示例#1
0
def test_search_export():
    expected_result = read_response_as_json('tools', 'search_expected.json')

    search_index = 'scoap3-records-record'
    search_result = read_response_as_json('elasticsearch', 'tool_search.json')
    es = MockES(search_result, with_assert=False)

    config = {
        'SEARCH_UI_SEARCH_INDEX':
        search_index,
        'SEARCH_EXPORT_FIELDS': (
            ('Publication year', 'year', 'year'),
            ('Control number', 'control_number', 'control_number'),
            ('DOI', 'dois', 'dois[0].value'),
            ('Title', 'titles', 'titles[0].title'),
            ('arXiv id', 'arxiv_eprints', 'arxiv_eprints[0].value'),
            ('arXiv primary category', 'arxiv_eprints',
             'arxiv_eprints[0].categories[0]'),
            ('Publication date', 'imprints', 'imprints[0].date'),
            ('Record creation date', 'record_creation_date',
             'record_creation_date'),
            ('Journal', 'publication_info',
             'publication_info[0].journal_title'),
        )
    }

    with patch('scoap3.modules.tools.tools.current_search_client', es), \
            patch('scoap3.modules.tools.tools.current_app', MockApp(config)):
        result = search_export(None)
        assert result['data'] == expected_result
示例#2
0
def base_test_tool(export_function, country):
    search_index = 'scoap3-records-record'
    search_result = read_response_as_json('elasticsearch',
                                          'tool_affiliations.json')
    q = 'country:%s' % country if country else None
    kwargs = {
        'q':
        q,
        'index':
        search_index,
        '_source': [
            'publication_info.year',
            'publication_info.journal_title',
            'arxiv_eprints',
            'dois',
            'authors',
            'control_number',
        ],
        'size':
        100,
        'from_':
        0
    }
    es = MockES(search_result, **kwargs)

    config = {'SEARCH_UI_SEARCH_INDEX': search_index}

    with patch('scoap3.modules.tools.tools.current_search_client', es), \
            patch('scoap3.modules.tools.tools.current_app', MockApp(config)):
        return export_function(country=country)
示例#3
0
def test_halt_invalid_record():
    # read article data from json
    json_data = read_response_as_json('hepcrawl', 'aps2.json')

    # delete a required field to fail the validation
    json_data.pop('abstracts')

    # run workflow
    with requests_mock.Mocker() as m:
        m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188',
              content=read_response(
                  'article_upload',
                  'export.arxiv.org_api_query_search_query_id_1808.08188'))
        m.register_uri(
            'GET',
            'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025',
            request_headers={'Accept': 'application/pdf'},
            content=read_response('article_upload',
                                  'harvest.aps.org_PhysRevD.99.075025.pdf'))
        m.register_uri(
            'GET',
            'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025',
            request_headers={'Accept': 'text/xml'},
            content=read_response('article_upload',
                                  'harvest.aps.org_PhysRevD.99.075025.xml'))
        m.get('https://api.crossref.org/works/10.1103/PhysRevD.99.075025',
              content=read_response(
                  'article_upload',
                  'crossref.org_works_10.1103_PhysRevD.99.075025'))

        workflow = run_article_upload_with_data(json_data, m)
        assert workflow.status == WorkflowStatus.HALTED
        assert workflow.objects[0].extra_data['_message'].startswith(
            "Validation error: u'abstracts' is a "
            "required property")
示例#4
0
def test_invalid_record_update_with_whole_workflow():
    """
    Runs the article_upload workflow on the same article twice.

    For the second run a slightly modified json is passed, which has to fail the validation.
    """
    with requests_mock.Mocker() as m:
        m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188',
              content=read_response(
                  'article_upload',
                  'export.arxiv.org_api_query_search_query_id_1808.08188'))
        m.register_uri(
            'GET',
            'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025',
            request_headers={'Accept': 'application/pdf'},
            content=read_response('article_upload',
                                  'harvest.aps.org_PhysRevD.99.075025.pdf'))
        m.register_uri(
            'GET',
            'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025',
            request_headers={'Accept': 'text/xml'},
            content=read_response('article_upload',
                                  'harvest.aps.org_PhysRevD.99.075025.xml'))
        m.get('https://api.crossref.org/works/10.1103/PhysRevD.99.075025',
              content=read_response(
                  'article_upload',
                  'crossref.org_works_10.1103_PhysRevD.99.075025'))

        # read article data from json
        json_data = read_response_as_json('hepcrawl', 'aps2.json')

        # run first workflow
        workflow1 = run_article_upload_with_data(json_data, m)
        record1 = get_record_from_workflow(workflow1)
        assert workflow1.status == WorkflowStatus.COMPLETED

        # update article data
        updated_json_data = json_data.copy()
        updated_json_data['titles'][0]['title'] = 'Manually updated title'

        # delete a required field to fail the validation
        updated_json_data.pop('abstracts')

        # run second workflow
        workflow2 = run_article_upload_with_data(updated_json_data, m)
        record2 = get_record_from_workflow(workflow2)
        assert workflow2.status == WorkflowStatus.HALTED
        assert workflow2.objects[0].extra_data['_message'].startswith(
            "Validation error: u'abstracts' is a "
            "required property")

        # control number and title should be the same, since the validation failed
        assert record1["control_number"] == record2["control_number"]
        assert record1['titles'][0]['title'] == 'Alternative perspective on gauged lepton number and implications ' \
                                                'for collider physics'
        assert record1['titles'][0]['title'] == record2['titles'][0]['title']
示例#5
0
def test_record_update():
    """
    Runs the article_upload workflow on the same article twice.

    For the second run a slightly modified json is passed, to test if the record will be updated and a new record won't
    be created.
    """
    with requests_mock.Mocker() as m:
        m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188',
              content=read_response(
                  'article_upload',
                  'export.arxiv.org_api_query_search_query_id_1808.08188'))
        m.register_uri(
            'GET',
            'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025',
            request_headers={'Accept': 'application/pdf'},
            content=read_response('article_upload',
                                  'harvest.aps.org_PhysRevD.99.075025.pdf'))
        m.register_uri(
            'GET',
            'http://harvest.aps.org/v2/journals/articles/10.1103/PhysRevD.99.075025',
            request_headers={'Accept': 'text/xml'},
            content=read_response('article_upload',
                                  'harvest.aps.org_PhysRevD.99.075025.xml'))
        m.get('https://api.crossref.org/works/10.1103/PhysRevD.99.075025',
              content=read_response(
                  'article_upload',
                  'crossref.org_works_10.1103_PhysRevD.99.075025'))

        # read article data from json
        json_data = read_response_as_json('hepcrawl', 'aps2.json')

        # run first workflow
        workflow1 = run_article_upload_with_data(json_data, m)
        record1 = get_record_from_workflow(workflow1)
        assert workflow1.status == WorkflowStatus.COMPLETED

        # update article data
        updated_json_data = json_data.copy()
        updated_json_data['titles'][0]['title'] = 'Manually updated title'

        # run second workflow
        workflow2 = run_article_upload_with_data(updated_json_data, m)
        record2 = get_record_from_workflow(workflow2)
        assert workflow2.status == WorkflowStatus.COMPLETED

        # control number should be the same, but article data has to be updated
        assert record1["control_number"] == record2["control_number"]
        assert record1['titles'][0]['title'] == 'Alternative perspective on gauged lepton number and implications ' \
                                                'for collider physics'
        assert record2['titles'][0]['title'] == 'Manually updated title'
示例#6
0
def test_halt_record_without_authors():
    # read article data from json
    json_data = read_response_as_json('hepcrawl', 'aps2.json')

    # delete authors
    json_data.pop('authors')

    # run workflow
    with requests_mock.Mocker() as m:
        m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188',
              content=read_response(
                  'article_upload',
                  'export.arxiv.org_api_query_search_query_id_1808.08188'))

        workflow = run_article_upload_with_data(json_data, m)
        assert workflow.status == WorkflowStatus.HALTED
        assert workflow.objects[0].extra_data[
            '_message'] == 'No authors for article.'
示例#7
0
def test_halt_record_without_affiliations():
    # read article data from json
    json_data = read_response_as_json('hepcrawl', 'aps2.json')

    author_count = len(json_data.get('authors', ()))
    assert author_count > 0

    # delete all author affiliations
    for author_index in range(author_count):
        json_data['authors'][author_index].pop('affiliations')

    # run workflow
    with requests_mock.Mocker() as m:
        m.get('http://export.arxiv.org/api/query?search_query=id:1808.08188',
              content=read_response(
                  'article_upload',
                  'export.arxiv.org_api_query_search_query_id_1808.08188'))

        workflow = run_article_upload_with_data(json_data, m)
        assert workflow.status == WorkflowStatus.HALTED
        assert workflow.objects[0].extra_data['_message'] == (
            "No affiliations for author: {u'raw_name': u'We-Fu Chang', u'surname': u'Chang', u'given_names': u'We-Fu', "
            "u'full_name': u'Chang, We-Fu'}.")
示例#8
0
def run_article_upload_with_file(input_json_filename, mock_address):
    """Uses input_json_filename to load hepcrawl response and to run article_upload workflow.
    Returns the Workflow object."""

    json_data = read_response_as_json('hepcrawl', input_json_filename)
    return run_article_upload_with_data(json_data, mock_address)
示例#9
0
def test_author_export():
    expected_result = read_response_as_json('tools', 'authors_expected.json')
    result = base_test_tool(export_function=authors_export, country=None)
    assert result['data'] == expected_result
示例#10
0
def test_affiliation_export_country():
    expected_result = read_response_as_json('tools',
                                            'affiliation_expected_us.json')
    result = base_test_tool(export_function=affiliations_export, country='USA')
    assert result['data'] == expected_result