Python match_reference примеры использования

Язык программирования: Python

Пространство имен/Пакет: inspirehep.modules.workflows.tasks.refextract

Метод/Функция: match_reference

Примеров на hotexamples.com: 5

Python match_reference - 5 примеров найдено. Это лучшие примеры Python кода для inspirehep.modules.workflows.tasks.refextract.match_reference, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: test_workflows_tasks_refextract.py Проект: reve99/inspire-next

def test_match_reference_for_data_config():
    """Test reference matcher for the JCAP and JHEP configuration"""

    cited_record_json = {
        '$schema': 'http://localhost:5000/schemas/records/data.json',
        '_collections': ['Data'],
        'control_number': 1,
        'dois': [{
            'value': '10.5281/zenodo.11020'
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index_name='records-data',
                                          pid_type='dat')

    reference = {
        'reference': {
            'dois': ['10.5281/zenodo.11020'],
            'publication_info': {
                'year': 2007
            }
        }
    }

    reference = match_reference(reference)

    assert reference['record']['$ref'] == 'http://localhost:5000/api/data/1'

Пример #2

Показать файл

Файл: test_workflows_tasks_refextract.py Проект: reve99/inspire-next

def test_match_reference_on_texkey():
    cited_record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'control_number': 1,
        'document_type': ['article'],
        'texkeys': [
            'Giudice:2007fh',
        ],
        'titles': [{
            'title': 'The Strongly-Interacting Light Higgs'
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index_name='records-hep')

    reference = {
        'reference': {
            'texkey': 'Giudice:2007fh',
        }
    }

    schema = load_schema('hep')
    subschema = schema['properties']['references']

    assert validate([reference], subschema) is None
    reference = match_reference(reference)

    assert reference['record'][
        '$ref'] == 'http://localhost:5000/api/literature/1'
    assert validate([reference], subschema) is None

Пример #3

Показать файл

Файл: test_workflows_tasks_refextract.py Проект: reve99/inspire-next

def test_match_reference_ignores_deleted():
    cited_record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'control_number': 1,
        'document_type': ['article'],
        'deleted': True,
        'dois': [{
            'value': '10.1371/journal.pone.0188398',
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index_name='records-hep')

    reference = {
        'reference': {
            'dois': ['10.1371/journal.pone.0188398'],
        }
    }

    schema = load_schema('hep')
    subschema = schema['properties']['references']

    assert validate([reference], subschema) is None
    reference = match_reference(reference)

    assert 'record' not in reference

Пример #4

Показать файл

Файл: actions.py Проект: aidansedgewick/inspire-next

def refextract(obj, eng):
    """Extract references from various sources and add them to the workflow.

    Runs ``refextract`` on both the PDF attached to the workflow and the
    references provided by the submitter, if any, then chooses the one
    that generated the most and attaches them to the workflow object.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    if 'references' in obj.data:
        extracted_raw_references = extract_references_from_raw_refs(obj.data['references'])
        extracted_raw_references = [match_reference(ref) for ref in extracted_raw_references]
        obj.log.info('Extracted %d references from raw refs.', len(extracted_raw_references))
        obj.data['references'] = dedupe_list(extracted_raw_references)
        return

    pdf_references, text_references = [], []
    source = get_source(obj.data)

    with get_document_in_workflow(obj) as tmp_document:
        if tmp_document:
            pdf_references = extract_references_from_pdf(tmp_document, source)
            pdf_references = [match_reference(ref) for ref in pdf_references]
            pdf_references = dedupe_list(pdf_references)

    text = get_value(obj.extra_data, 'formdata.references')
    if text:
        text_references = extract_references_from_text(text, source)
        text_references = [match_reference(ref) for ref in text_references]
        text_references = dedupe_list(text_references)

    if len(pdf_references) == len(text_references) == 0:
        obj.log.info('No references extracted.')
    elif len(pdf_references) > len(text_references):
        obj.log.info('Extracted %d references from PDF.', len(pdf_references))
        obj.data['references'] = pdf_references
    elif len(text_references) >= len(pdf_references):
        obj.log.info('Extracted %d references from text.', len(text_references))
        obj.data['references'] = text_references

Пример #5

Показать файл

Файл: test_workflows_tasks_refextract.py Проект: reve99/inspire-next

def test_match_reference_for_jcap_and_jhep_config():
    """Test reference matcher for the JCAP and JHEP configuration"""

    cited_record_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'control_number':
        1,
        'document_type': ['article'],
        'publication_info': [{
            'artid': '045',
            'journal_title': 'JHEP',
            'journal_volume': '06',
            'page_start': '045',
            'year': 2007
        }],
        'titles': [{
            'title': 'The Strongly-Interacting Light Higgs'
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index_name='records-hep')

    reference = {
        'reference': {
            'publication_info': {
                'artid': '045',
                'journal_title': 'JHEP',
                'journal_volume': '06',
                'page_start': '045',
                'year': 2007
            }
        }
    }

    schema = load_schema('hep')
    subschema = schema['properties']['references']

    assert validate([reference], subschema) is None
    reference = match_reference(reference)

    assert reference['record'][
        '$ref'] == 'http://localhost:5000/api/literature/1'
    assert validate([reference], subschema) is None