Пример #1
0
def test_jlab_ticket_needed_returns_false():
    config = {'JLAB_ARXIV_CATEGORIES': ['nucl-th']}

    with patch.dict(current_app.config, config):
        data = {
            'arxiv_eprints': [{
                'categories': ['math.DG'],
                'value': '1806.03979'
            }]
        }
        extra_data = {}

        obj = MockObj(data, extra_data)
        eng = MockEng()

        assert jlab_ticket_needed(obj, eng) is False
Пример #2
0
def test_is_arxiv_paper_returns_false_if_source_is_not_present_for_hepcrawl():
    schema = load_schema('hep')
    subschema = schema['properties']['acquisition_source']

    data = {
        'acquisition_source': {
            'method': 'hepcrawl',
        },
    }
    extra_data = {}
    assert validate(data['acquisition_source'], subschema) is None

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert not is_arxiv_paper(obj, eng)
Пример #3
0
def test_article_exists_returns_true_if_something_matched(mock_match):
    mock_match.return_value = iter([{'_source': {'control_number': 4328}}])

    data = {}
    extra_data = {}

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert article_exists(obj, eng)
    assert 'record_matches' in obj.extra_data

    expected = [4328]
    result = obj.extra_data['record_matches']

    assert expected == result
Пример #4
0
def test_article_exists_returns_false_if_nothing_matched(mock_match):
    mock_match.return_value = iter([])

    data = {}
    extra_data = {}

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert not article_exists(obj, eng)
    assert 'record_matches' in obj.extra_data

    expected = []
    result = obj.extra_data['record_matches']

    assert expected == result
Пример #5
0
def test_exact_match_returns_true_if_something_matched(mock_match):
    mock_match.return_value = iter([{'_source': {'control_number': 4328}}])

    data = {}
    extra_data = {}

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert exact_match(obj, eng)
    assert 'matches' in obj.extra_data

    expected = [4328]
    result = get_value(obj.extra_data, 'matches.exact')

    assert expected == result
Пример #6
0
def test_fuzzy_match_returns_false_if_nothing_matched(mock_match, enable_fuzzy_matcher):
    mock_match.return_value = iter([])

    data = {}
    extra_data = {}

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert not fuzzy_match(obj, eng)
    assert 'matches' in obj.extra_data

    expected = []
    result = get_value(obj.extra_data, 'matches.fuzzy')

    assert expected == result
def test_wait_webcoll_halts_the_workflow_engine_when_in_production_mode():
    config = {'PRODUCTION_MODE': True}

    with patch.dict(current_app.config, config):
        data = {}
        extra_data = {}

        obj = MockObj(data, extra_data)
        eng = MockEng()

        assert wait_webcoll(obj, eng) is None

        expected = 'Waiting for webcoll.'
        result = eng.msg

        assert expected == result
def test_refextract_from_pdf(mock_get_pdf_in_workflow):
    mock_get_pdf_in_workflow.return_value = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1704.00452.pdf'))

    schema = load_schema('hep')
    subschema = schema['properties']['acquisition_source']

    data = {'acquisition_source': {'source': 'arXiv'}}
    extra_data = {}
    assert validate(data['acquisition_source'], subschema) is None

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert refextract(obj, eng) is None
    assert obj.data['references'][0]['raw_refs'][0]['source'] == 'arXiv'
def test_populate_submission_document_does_not_duplicate_documents():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03844',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03844.pdf')),
        )

        schema = load_schema('hep')
        subschema = schema['properties']['acquisition_source']

        data = {
            'acquisition_source': {
                'datetime': '2017-11-30T16:38:43.352370',
                'email': '*****@*****.**',
                'internal_uid': 54252,
                'method': 'submitter',
                'orcid': '0000-0002-2174-4493',
                'source': 'submitter',
                'submission_number': '1',
            },
        }
        extra_data = {
            'submission_pdf': 'http://export.arxiv.org/pdf/1605.03844',
        }
        files = MockFiles({})
        assert validate(data['acquisition_source'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert populate_submission_document(obj, eng) is None
        assert populate_submission_document(obj, eng) is None

        expected = [
            {
                'fulltext': True,
                'key': 'fulltext.pdf',
                'original_url': 'http://export.arxiv.org/pdf/1605.03844',
                'source': 'submitter',
                'url': 'http://export.arxiv.org/pdf/1605.03844',
            },
        ]
        result = obj.data['documents']

        assert expected == result
Пример #10
0
def test_fuzzy_match_returns_true_if_something_matched_with_1_author(mock_match, enable_fuzzy_matcher):
    schema = load_schema('hep')
    authors_schema = schema['properties']['authors']
    titles_schema = schema['properties']['titles']

    matched_record = {
        'control_number': 4328,
        'titles': [
            {
                'title': 'title',
            },
        ],
        'authors': [
            {
                'full_name': 'Author 1'
            },
        ],
        'authors_count': 1
    }

    assert validate(matched_record['titles'], titles_schema) is None
    assert validate(matched_record['authors'], authors_schema) is None

    mock_match.return_value = iter([{'_source': matched_record}])

    data = {}
    extra_data = {}

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert fuzzy_match(obj, eng)
    assert 'matches' in obj.extra_data

    expected = [{
        'control_number': 4328,
        'title': 'title',
        'authors': [
            {
                'full_name': 'Author 1'
            },
        ],
        'authors_count': 1
    }]
    result = get_value(obj.extra_data, 'matches.fuzzy')

    assert expected == result
Пример #11
0
def test_populate_arxiv_document_does_not_duplicate_files_if_called_multiple_times(
):
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03844',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03844.pdf')),
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'physics.ins-det',
                    ],
                    'value': '1605.03844',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert populate_arxiv_document(obj, eng) is None
        assert populate_arxiv_document(obj, eng) is None

        expected = [
            {
                'key': '1605.03844.pdf',
                'fulltext': True,
                'hidden': True,
                'material': 'preprint',
                'original_url': 'http://export.arxiv.org/pdf/1605.03844',
                'url': 'http://export.arxiv.org/pdf/1605.03844',
                'source': 'arxiv',
            },
        ]
        result = obj.data['documents']

        assert expected == result
Пример #12
0
def test_arxiv_author_list_does_not_produce_latex():
    schema = load_schema('hep')

    filename = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1802.03388.tar.gz'))

    eprints_subschema = schema['properties']['arxiv_eprints']
    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ex',
                ],
                'value': '1802.03388',
            },
        ],
    }
    validate(data['arxiv_eprints'], eprints_subschema)

    extra_data = {}
    files = MockFiles({
        '1802.03388.tar.gz': AttrDict({'file': AttrDict({'uri': filename})})
    })

    authors_subschema = schema['properties']['authors']
    expected_authors = [
        {
            'affiliations': [{'value': 'Lund U.'}],
            'ids': [
                {
                    'value': 'INSPIRE-00061248',
                    'schema': 'INSPIRE ID'
                }
            ],
            'full_name': u'Åkesson, Torsten Paul Ake'
        },
    ]
    validate(expected_authors, authors_subschema)

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()

    assert default_arxiv_author_list(obj, eng) is None
    assert obj.data.get('authors') == expected_authors
def test_is_arxiv_paper_ignores_case_for_hepcrawl():
    schema = load_schema('hep')
    subschema = schema['properties']['acquisition_source']

    data = {
        'acquisition_source': {
            'method': 'hepcrawl',
            'source': 'arXiv',
        },
    }
    extra_data = {}
    assert validate(data['acquisition_source'], subschema) is None

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert is_arxiv_paper(obj, eng)
Пример #14
0
def test_reply_ticket_calls_tickets_reply_when_template_is_not_set(
        mock_reply_ticket, mock_user):
    mock_user.query.get.return_value = MockUser('*****@*****.**')
    data = {
        'titles': [
            {
                'title': 'Partial Symmetries of Weak Interactions'
            },
        ],
    }
    extra_data = {'ticket_id': 1, 'reason': 'reply reason'}
    obj = MockObj(data, extra_data)
    eng = MockEng()
    _reply_ticket = reply_ticket()
    _reply_ticket(obj, eng)
    mock_reply_ticket.assert_called_with(extra_data['ticket_id'],
                                         extra_data['reason'], False)
Пример #15
0
def test_arxiv_fulltext_download_retries_on_error():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03814',
            [
                {
                    'content': '',
                    'status_code': 500,
                },
                {
                    'content':
                    pkg_resources.resource_string(
                        __name__, os.path.join('fixtures', '1605.03814.pdf')),
                    'status_code':
                    200,
                },
            ],
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'hep-ex',
                    ],
                    'value': '1605.03814',
                },
            ],
        }  # literature/1458270
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert arxiv_fulltext_download(obj, eng) is None

        expected = 'PDF retrieved from arXiv for 1605.03814'
        result = obj.log._info.getvalue()

        assert expected == result
Пример #16
0
def test_send_robotupload_removes_references_if_feature_flag_disabled():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz')

        schema = load_schema('hep')
        subschema = schema['properties']['references']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config), \
                patch('inspirehep.modules.workflows.tasks.submission.record2marcxml') as mock_record2marcxml:
            data = {
                '$schema':
                'http://localhost:5000/schemas/records/hep.json',
                'references': [
                    {
                        'raw_refs': [
                            {
                                'schema':
                                'text',
                                'value':
                                '[1] J. Maldacena and A. Strominger, hep-th/9710014.',
                            },
                        ],
                    },
                ]
            }
            data_without_references = {
                '$schema': 'http://localhost:5000/schemas/records/hep.json',
            }
            extra_data = {}
            assert validate(data['references'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(mode='insert', )

            assert _send_robotupload(obj, eng) is None
            assert mock_record2marcxml.called_with(data_without_references)
def test_download_documents_with_multiple_documents():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03844',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03844.pdf')),
        )
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03845',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03844.pdf')),
        )

        schema = load_schema('hep')
        subschema = schema['properties']['documents']

        data = {
            'documents': [
                {
                    'key': '1605.03844.pdf',
                    'url': 'http://export.arxiv.org/pdf/1605.03844'
                },
                {
                    'key': '1605.03845.pdf',
                    'url': 'http://export.arxiv.org/pdf/1605.03845'
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['documents'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert download_documents(obj, eng) is None

        documents = obj.data['documents']
        expected_document_url_1 = '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03844.pdf'
        expected_document_url_2 = '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03845.pdf'

        assert 2 == len(documents)
        assert expected_document_url_1 == documents[0]['url']
        assert expected_document_url_2 == documents[1]['url']
Пример #18
0
def test_arxiv_derive_inspire_categories_appends_categories_with_different_source(
):
    schema = load_schema('hep')
    arxiv_eprints_schema = schema['properties']['arxiv_eprints']
    inspire_categories_schema = schema['properties']['inspire_categories']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-th',
                ],
                'value': '1605.03898',
            },
        ],
        'inspire_categories': [
            {
                'source': 'undefined',
                'term': 'Theory-Nucl',
            },
        ],
    }  # literature/1458300
    extra_data = {}
    assert validate(data['arxiv_eprints'], arxiv_eprints_schema) is None
    assert validate(data['inspire_categories'],
                    inspire_categories_schema) is None

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert arxiv_derive_inspire_categories(obj, eng) is None

    expected = [
        {
            'source': 'undefined',
            'term': 'Theory-Nucl',
        },
        {
            'source': 'arxiv',
            'term': 'Theory-Nucl',
        },
    ]
    result = obj.data['inspire_categories']

    assert validate(result, inspire_categories_schema) is None
    assert expected == result
def test_set_schema_adds_a_schema_from_the_eng_data_type():
    schema = load_schema('hep')
    subschema = schema['properties']['$schema']

    data = {}
    extra_data = {}

    obj = MockObj(data, extra_data)
    eng = MockEng(data_type='hep')

    assert set_schema(obj, eng) is None

    expected = 'http://localhost:5000/schemas/records/hep.json'
    result = obj.data

    assert validate(result['$schema'], subschema) is None
    assert expected == result['$schema']
def test_send_robotupload_works_with_hepnames2marc_and_mode_insert():
    httpretty.HTTPretty.allow_net_connect = False
    httpretty.register_uri(
        httpretty.POST,
        'http://inspirehep.net/batchuploader/robotupload/insert',
        body='[INFO] foo bar baz')

    schema = load_schema('authors')
    subschema = schema['properties']['arxiv_categories']

    config = {
        'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
        'PRODUCTION_MODE': True,
    }

    with patch.dict(current_app.config, config):
        data = {
            'arxiv_categories': [
                'hep-th',
            ],
        }
        extra_data = {}
        assert validate(data['arxiv_categories'], subschema) is None

        obj = MockObj(data, extra_data)
        eng = MockEng()

        _send_robotupload = send_robotupload(
            marcxml_processor=hepnames2marc,
            mode='insert',
        )

        assert _send_robotupload(obj, eng) is None

        expected = ('Robotupload sent!' '[INFO] foo bar baz' 'end of upload')
        result = obj.log._info.getvalue()

        assert expected == result

        expected = 'Waiting for robotupload: [INFO] foo bar baz'
        result = eng.msg

        assert expected == result

    httpretty.HTTPretty.allow_net_connect = True
Пример #21
0
def test_arxiv_author_list_logs_on_error(mock_os, mock_untar):
    mock_untar.side_effect = InvalidTarball

    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-th',
                ],
                'value': '1605.07707',
            },
        ],
    }  # synthethic data
    extra_data = {}
    files = MockFiles({
        '1605.07707.tar.gz':
        AttrDict({
            'file':
            AttrDict({
                'uri': 'http://export.arxiv.org/e-print/1605.07707',
            })
        })
    })
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data, files=files)
    eng = MockEng()

    default_arxiv_author_list = arxiv_author_list()

    try:
        temporary_dir = mkdtemp()
        mock_os.path.abspath.return_value = temporary_dir

        assert default_arxiv_author_list(obj, eng) is None

        expected = 'Invalid tarball http://export.arxiv.org/e-print/1605.07707 for arxiv_id 1605.07707'
        result = obj.log._error.getvalue()

        assert expected == result
    finally:
        rmtree(temporary_dir)
def test_set_schema_does_nothing_when_the_schema_url_is_already_full():
    schema = load_schema('hep')
    subschema = schema['properties']['$schema']

    data = {'$schema': 'http://localhost:5000/schemas/records/hep.json'}
    extra_data = {}
    assert validate(data['$schema'], subschema) is None

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert set_schema(obj, eng) is None

    expected = 'http://localhost:5000/schemas/records/hep.json'
    result = obj.data

    assert validate(result['$schema'], subschema) is None
    assert expected == result['$schema']
Пример #23
0
def test_reply_ticket_calls_tickets_reply_with_template_when_template_is_set(
        mock_reply_ticket_with_template, mock_user):
    mock_user.query.get.return_value = MockUser('*****@*****.**')
    data = {
        'titles': [
            {
                'title': 'Partial Symmetries of Weak Interactions'
            },
        ],
    }
    extra_data = {'ticket_id': 1}
    template = 'template_path'
    obj = MockObj(data, extra_data)
    eng = MockEng()
    _reply_ticket = reply_ticket(template=template)
    _reply_ticket(obj, eng)
    mock_reply_ticket_with_template.assert_called_with(extra_data['ticket_id'],
                                                       template, {}, False)
Пример #24
0
def test_create_ticket_calls_tickets_create_with_template(
        mock_create_ticket_with_template, mock_user):
    mock_user.query.get.return_value = MockUser('*****@*****.**')
    data = {
        'titles': [
            {
                'title': 'Partial Symmetries of Weak Interactions'
            },
        ],
    }
    template = 'template_path'
    extra_data = {'recid': '1'}
    obj = MockObj(data, extra_data)
    eng = MockEng()
    _create_ticket = create_ticket(template=template)
    _create_ticket(obj, eng)
    mock_create_ticket_with_template.assert_called_with(
        'Test', '*****@*****.**', template, {}, None, extra_data['recid'])
def test_classify_paper_does_not_raise_on_unprintable_keywords(
        get_document_in_workflow, higgs_ontology):
    paper_with_unprintable_keywords = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', '1802.08709.pdf'))

    get_document_in_workflow.return_value.__enter__.return_value = paper_with_unprintable_keywords
    get_document_in_workflow.return_value.__exit__.return_value = None

    obj = MockObj({}, {})
    eng = MockEng()

    classify_paper(
        taxonomy=higgs_ontology,
        only_core_tags=False,
        spires=True,
        with_author_keywords=True,
        no_cache=True,
    )(obj, eng)  # Does not raise.
def test_is_experimental_paper_does_not_raise_if_obj_has_no_arxiv_category():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    data = {
        'arxiv_eprints': [
            {
                'value': '1712.02280'
            },
        ],
    }
    extra_data = {}
    assert validate(data['arxiv_eprints'], subschema) is None

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert not is_experimental_paper(obj, eng)
Пример #27
0
def test_populate_arxiv_document_logs_on_pdf_not_existing():
    response500 = {'content': '', 'status_code': 500}
    response200 = {
        'content':
        pkg_resources.resource_string(
            __name__, os.path.join('fixtures', '1707.02785.html')),
        'status_code':
        200,
    }
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.get(
            'http://export.arxiv.org/pdf/1707.02785',
            (response200, ),
        )
        requests_mocker.get(
            'http://arxiv.org/pdf/1707.02785',
            (response500, ),
        )
        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'cs.CV',
                    ],
                    'value': '1707.02785',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert populate_arxiv_document(obj, eng) is None

        expected = 'No PDF is available for 1707.02785'
        result = obj.log._info.getvalue()

        assert expected == result
Пример #28
0
def test_fuzzy_match_returns_true_if_something_matched_with_arxiv_eprints(mock_match, enable_fuzzy_matcher):
    schema = load_schema('hep')
    arxiv_eprints_schema = schema['properties']['arxiv_eprints']
    titles_schema = schema['properties']['titles']

    matched_record = {
        'control_number': 1472986,
        'titles': [
            {
                'title': 'title',
            },
        ],
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-ph'
                ],
                'value': '1606.09129'
            }
        ],
    }

    assert validate(matched_record['titles'], titles_schema) is None
    assert validate(matched_record['arxiv_eprints'], arxiv_eprints_schema) is None

    mock_match.return_value = iter([{'_source': matched_record}])

    data = {}
    extra_data = {}

    obj = MockObj(data, extra_data)
    eng = MockEng()

    assert fuzzy_match(obj, eng)
    assert 'matches' in obj.extra_data

    expected = [{
        'control_number': 1472986,
        'title': 'title',
        'arxiv_eprint': '1606.09129',
    }]
    result = get_value(obj.extra_data, 'matches.fuzzy')

    assert expected == result
Пример #29
0
def test_send_robotupload_works_with_mode_insert_on_authors():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'POST',
            'http://inspirehep.net/batchuploader/robotupload/insert',
            text='[INFO] foo bar baz')

        schema = load_schema('authors')
        subschema = schema['properties']['arxiv_categories']

        config = {
            'LEGACY_ROBOTUPLOAD_URL': 'http://inspirehep.net',
            'PRODUCTION_MODE': True,
        }

        with patch.dict(current_app.config, config):
            data = {
                '$schema':
                'http://localhost:5000/schemas/records/authors.json',
                'arxiv_categories': [
                    'hep-th',
                ],
            }
            extra_data = {}
            assert validate(data['arxiv_categories'], subschema) is None

            obj = MockObj(data, extra_data)
            eng = MockEng()

            _send_robotupload = send_robotupload(mode='insert', )

            assert _send_robotupload(obj, eng) is None

            expected = ('Robotupload sent!'
                        '[INFO] foo bar baz'
                        'end of upload')
            result = obj.log._info.getvalue()

            assert expected == result

            expected = 'Waiting for robotupload: [INFO] foo bar baz'
            result = eng.msg

            assert expected == result
Пример #30
0
def test_arxiv_fulltext_download_polulates_documents():
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            'GET',
            'http://export.arxiv.org/pdf/1605.03844',
            content=pkg_resources.resource_string(
                __name__, os.path.join('fixtures', '1605.03844.pdf')),
        )

        schema = load_schema('hep')
        subschema = schema['properties']['arxiv_eprints']

        data = {
            'arxiv_eprints': [
                {
                    'categories': [
                        'physics.ins-det',
                    ],
                    'value': '1605.03844',
                },
            ],
        }  # literature/1458302
        extra_data = {}
        files = MockFiles({})
        assert validate(data['arxiv_eprints'], subschema) is None

        obj = MockObj(data, extra_data, files=files)
        eng = MockEng()

        assert arxiv_fulltext_download(obj, eng) is None

        expected = [{
            'fulltext': True,
            'original_url': 'http://export.arxiv.org/pdf/1605.03844',
            'url':
            '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03844.pdf',
            'material': 'preprint',
            'source': 'arxiv',
            'key': '1605.03844.pdf',
            'hidden': True
        }]
        result = obj.data['documents']

        assert expected == result