Exemplos de match_reference em Python, exemplos de inspirehep.modules.workflows.tasks.refextract.match_reference em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_workflows_tasks_refextract.py Projeto: reve99/inspire-next

def test_match_reference_for_data_config():
    """Test reference matcher for the JCAP and JHEP configuration"""

    cited_record_json = {
        '$schema': 'http://localhost:5000/schemas/records/data.json',
        '_collections': ['Data'],
        'control_number': 1,
        'dois': [{
            'value': '10.5281/zenodo.11020'
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index_name='records-data',
                                          pid_type='dat')

    reference = {
        'reference': {
            'dois': ['10.5281/zenodo.11020'],
            'publication_info': {
                'year': 2007
            }
        }
    }

    reference = match_reference(reference)

    assert reference['record']['$ref'] == 'http://localhost:5000/api/data/1'

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_workflows_tasks_refextract.py Projeto: reve99/inspire-next

def test_match_reference_on_texkey():
    cited_record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'control_number': 1,
        'document_type': ['article'],
        'texkeys': [
            'Giudice:2007fh',
        ],
        'titles': [{
            'title': 'The Strongly-Interacting Light Higgs'
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index_name='records-hep')

    reference = {
        'reference': {
            'texkey': 'Giudice:2007fh',
        }
    }

    schema = load_schema('hep')
    subschema = schema['properties']['references']

    assert validate([reference], subschema) is None
    reference = match_reference(reference)

    assert reference['record'][
        '$ref'] == 'http://localhost:5000/api/literature/1'
    assert validate([reference], subschema) is None

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_workflows_tasks_refextract.py Projeto: reve99/inspire-next

def test_match_reference_ignores_deleted():
    cited_record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'control_number': 1,
        'document_type': ['article'],
        'deleted': True,
        'dois': [{
            'value': '10.1371/journal.pone.0188398',
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index_name='records-hep')

    reference = {
        'reference': {
            'dois': ['10.1371/journal.pone.0188398'],
        }
    }

    schema = load_schema('hep')
    subschema = schema['properties']['references']

    assert validate([reference], subschema) is None
    reference = match_reference(reference)

    assert 'record' not in reference

Exemplo n.º 4

0

Exibir arquivo

Arquivo: actions.py Projeto: aidansedgewick/inspire-next

def refextract(obj, eng):
    """Extract references from various sources and add them to the workflow.

    Runs ``refextract`` on both the PDF attached to the workflow and the
    references provided by the submitter, if any, then chooses the one
    that generated the most and attaches them to the workflow object.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    if 'references' in obj.data:
        extracted_raw_references = extract_references_from_raw_refs(obj.data['references'])
        extracted_raw_references = [match_reference(ref) for ref in extracted_raw_references]
        obj.log.info('Extracted %d references from raw refs.', len(extracted_raw_references))
        obj.data['references'] = dedupe_list(extracted_raw_references)
        return

    pdf_references, text_references = [], []
    source = get_source(obj.data)

    with get_document_in_workflow(obj) as tmp_document:
        if tmp_document:
            pdf_references = extract_references_from_pdf(tmp_document, source)
            pdf_references = [match_reference(ref) for ref in pdf_references]
            pdf_references = dedupe_list(pdf_references)

    text = get_value(obj.extra_data, 'formdata.references')
    if text:
        text_references = extract_references_from_text(text, source)
        text_references = [match_reference(ref) for ref in text_references]
        text_references = dedupe_list(text_references)

    if len(pdf_references) == len(text_references) == 0:
        obj.log.info('No references extracted.')
    elif len(pdf_references) > len(text_references):
        obj.log.info('Extracted %d references from PDF.', len(pdf_references))
        obj.data['references'] = pdf_references
    elif len(text_references) >= len(pdf_references):
        obj.log.info('Extracted %d references from text.', len(text_references))
        obj.data['references'] = text_references

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_workflows_tasks_refextract.py Projeto: reve99/inspire-next

def test_match_reference_for_jcap_and_jhep_config():
    """Test reference matcher for the JCAP and JHEP configuration"""

    cited_record_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'control_number':
        1,
        'document_type': ['article'],
        'publication_info': [{
            'artid': '045',
            'journal_title': 'JHEP',
            'journal_volume': '06',
            'page_start': '045',
            'year': 2007
        }],
        'titles': [{
            'title': 'The Strongly-Interacting Light Higgs'
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index_name='records-hep')

    reference = {
        'reference': {
            'publication_info': {
                'artid': '045',
                'journal_title': 'JHEP',
                'journal_volume': '06',
                'page_start': '045',
                'year': 2007
            }
        }
    }

    schema = load_schema('hep')
    subschema = schema['properties']['references']

    assert validate([reference], subschema) is None
    reference = match_reference(reference)

    assert reference['record'][
        '$ref'] == 'http://localhost:5000/api/literature/1'
    assert validate([reference], subschema) is None