def test_populate_submission_document_without_pdf(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1707.02785', content=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1707.02785.html')), ) schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] data = { 'acquisition_source': { 'datetime': '2017-11-30T16:38:43.352370', 'email': '*****@*****.**', 'internal_uid': 54252, 'method': 'submitter', 'orcid': '0000-0002-2174-4493', 'source': 'submitter', 'submission_number': '1' } } assert validate(data['acquisition_source'], subschema) is None extra_data = { 'submission_pdf': 'http://export.arxiv.org/pdf/1707.02785', } files = MockFiles({}) obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_submission_document(obj, eng) is None documents = obj.data.get('documents', []) assert 0 == len(documents)
def test_populate_submission_document_does_not_duplicate_documents(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1605.03844', content=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1605.03844.pdf')), ) schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] data = { 'acquisition_source': { 'datetime': '2017-11-30T16:38:43.352370', 'email': '*****@*****.**', 'internal_uid': 54252, 'method': 'submitter', 'orcid': '0000-0002-2174-4493', 'source': 'submitter', 'submission_number': '1', }, } extra_data = { 'submission_pdf': 'http://export.arxiv.org/pdf/1605.03844', } files = MockFiles({}) assert validate(data['acquisition_source'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_submission_document(obj, eng) is None assert populate_submission_document(obj, eng) is None expected = [ { 'fulltext': True, 'key': 'fulltext.pdf', 'original_url': 'http://export.arxiv.org/pdf/1605.03844', 'source': 'submitter', 'url': 'http://export.arxiv.org/pdf/1605.03844', }, ] result = obj.data['documents'] assert expected == result
def test_populate_submission_document_does_not_duplicate_documents(): schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] data = { 'acquisition_source': { 'datetime': '2017-11-30T16:38:43.352370', 'email': '*****@*****.**', 'internal_uid': 54252, 'method': 'submitter', 'orcid': '0000-0002-2174-4493', 'source': 'submitter', 'submission_number': '1' } } assert validate(data['acquisition_source'], subschema) is None extra_data = { 'submission_pdf': 'http://export.arxiv.org/pdf/1605.03844', } files = MockFiles({}) obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_submission_document(obj, eng) is None assert populate_submission_document(obj, eng) is None expected_key = 'fulltext.pdf' expected_documents = [ { 'fulltext': True, 'key': expected_key, 'original_url': 'http://export.arxiv.org/pdf/1605.03844', 'source': 'submitter', 'url': 'http://export.arxiv.org/pdf/1605.03844', } ] documents = obj.data['documents'] assert expected_documents == documents