Пример #1
0
def arxiv_derive_inspire_categories(obj, eng):
    """Derive ``inspire_categories`` from the arXiv categories.

    Uses side effects to populate the ``inspire_categories`` key
    in ``obj.data`` by converting its arXiv categories.

    Args:
        obj (WorkflowObject): a workflow object.
        eng (WorkflowEngine): a workflow engine.

    Returns:
        None

    """
    obj.data.setdefault('inspire_categories', [])

    for arxiv_category in get_arxiv_categories(obj.data):
        term = classify_field(arxiv_category)
        if term:
            inspire_category = {
                'source': 'arxiv',
                'term': term,
            }

            if inspire_category not in obj.data['inspire_categories']:
                obj.data['inspire_categories'].append(inspire_category)
Пример #2
0
def arxiv_derive_inspire_categories(obj, eng):
    """Derive ``inspire_categories`` from the arXiv categories.

    Uses side effects to populate the ``inspire_categories`` key
    in ``obj.data`` by converting its arXiv categories.

    Args:
        obj (WorkflowObject): a workflow object.
        eng (WorkflowEngine): a workflow engine.

    Returns:
        None

    """
    obj.data.setdefault('inspire_categories', [])

    for arxiv_category in get_arxiv_categories(obj.data):
        term = classify_field(arxiv_category)
        if term:
            inspire_category = {
                'source': 'arxiv',
                'term': term,
            }

            if inspire_category not in obj.data['inspire_categories']:
                obj.data['inspire_categories'].append(inspire_category)
Пример #3
0
def is_being_harvested_on_legacy(record):
    """Return True if the record is being harvested on Legacy.

    If the record belongs to one of the CORE arXiv categories then it
    is already being harvested on Legacy.
    """
    arxiv_categories = get_arxiv_categories(record)
    legacy_categories = current_app.config.get(
        'ARXIV_CATEGORIES_ALREADY_HARVESTED_ON_LEGACY', [])

    return len(set(arxiv_categories) & set(legacy_categories)) > 0
Пример #4
0
def is_being_harvested_on_legacy(record):
    """Return True if the record is being harvested on Legacy.

    If the record belongs to one of the CORE arXiv categories then it
    is already being harvested on Legacy.
    """
    arxiv_categories = get_arxiv_categories(record)
    legacy_categories = current_app.config.get(
        'ARXIV_CATEGORIES_ALREADY_HARVESTED_ON_LEGACY', [])

    return len(set(arxiv_categories) & set(legacy_categories)) > 0
Пример #5
0
def has_fully_harvested_category(record):
    """Check if the record in `obj.data` has fully harvested categories.

    Arguments:
        record(dict): the ingested article.

    Return:
        bool: True when the record belongs to an arXiv category that is fully
        harvested, otherwise False.
    """
    record_categories = set(get_arxiv_categories(record))
    harvested_categories = current_app.config.get('ARXIV_CATEGORIES', {})
    return len(record_categories & set(
        harvested_categories.get('core') +
        harvested_categories.get('non-core'))) > 0
Пример #6
0
def get_coreness(record):
    """Return the coreness of the record harvested.

    Args:
        record(dict): the record harvested.

    Return:
        a Coreness value in according to the record's coreness level.
    """
    arxiv_categories = set(get_arxiv_categories(record))
    relevant_categories = current_app.config.get('ARXIV_CATEGORIES', [])

    if arxiv_categories & set(relevant_categories['core']):
        return Coreness.core
    elif arxiv_categories & set(relevant_categories['non-core']):
        return Coreness.non_core
    else:
        return Coreness.non_relevant
Пример #7
0
def has_fully_harvested_category(record):
    """Check if the record in `obj.data` has fully harvested categories.

    Arguments:
        record(dict): the ingested article.

    Return:
        bool: True when the record belongs to an arXiv category that is fully
        harvested, otherwise False.
    """
    record_categories = set(get_arxiv_categories(record))
    harvested_categories = current_app.config.get('ARXIV_CATEGORIES', {})
    return len(
        record_categories &
        set(
            harvested_categories.get('core') +
            harvested_categories.get('non-core')
        )
    ) > 0
Пример #8
0
def test_get_arxiv_categories_returns_all_arxiv_categories():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    record = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-th',
                ],
                'value': '1605.03898'
            },
        ],
    }  # literature/1458300
    assert validate(record['arxiv_eprints'], subschema) is None

    expected = ['nucl-th']
    result = get_arxiv_categories(record)

    assert expected == result
Пример #9
0
def is_experimental_paper(obj, eng):
    """Check if a workflow contains an experimental paper.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        bool: whether the workflow contains an experimental paper.

    """
    arxiv_categories = get_arxiv_categories(obj.data)
    inspire_categories = get_inspire_categories(obj.data)

    has_experimental_arxiv_category = len(
        set(arxiv_categories) & set(EXPERIMENTAL_ARXIV_CATEGORIES)) > 0
    has_experimental_inspire_category = len(
        set(inspire_categories) & set(EXPERIMENTAL_INSPIRE_CATEGORIES)) > 0

    return has_experimental_arxiv_category or has_experimental_inspire_category
Пример #10
0
def test_get_arxiv_categories_returns_all_arxiv_categories():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    record = {
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-th',
                ],
                'value': '1605.03898'
            },
        ],
    }  # literature/1458300
    assert validate(record['arxiv_eprints'], subschema) is None

    expected = ['nucl-th']
    result = get_arxiv_categories(record)

    assert expected == result
Пример #11
0
def test_get_arxiv_categories():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    record = {
        'arxiv_eprints': [
            {
                'categories': [
                    'hep-th',
                    'hep-ph',
                ],
                'value': '1612.08928',
            },
        ],
    }
    assert validate(record['arxiv_eprints'], subschema) is None

    expected = ['hep-th', 'hep-ph']
    result = get_arxiv_categories(record)

    assert expected == result
Пример #12
0
 def _is_core(record):
     return set(get_arxiv_categories(record)) & \
         set(current_app.config.get('ARXIV_CATEGORIES', {}).get('core'))
Пример #13
0
def physics_data_an_is_primary_category(record):
    record_categories = get_arxiv_categories(record)
    if record_categories:
        return record_categories[0] == 'physics.data-an'
    return False
Пример #14
0
def jlab_ticket_needed(obj, eng):
    """Check if the a JLab curation ticket is needed."""
    jlab_categories = set(current_app.config['JLAB_ARXIV_CATEGORIES'])
    arxiv_categories = set(get_arxiv_categories(obj.data))
    return bool(jlab_categories & arxiv_categories)
Пример #15
0
 def _is_core(record):
     return set(get_arxiv_categories(record)) & \
         set(current_app.config.get('ARXIV_CATEGORIES', {}).get('core'))
Пример #16
0
def physics_data_an_is_primary_category(record):
    record_categories = get_arxiv_categories(record)
    if record_categories:
        return record_categories[0] == 'physics.data-an'
    return False