Python classify_field示例，inspire_schemas.utils.classify_field Python示例

示例#1

0

显示文件

文件： arxiv.py 项目： ksachs/inspire-next

def arxiv_derive_inspire_categories(obj, eng):
    """Derive ``inspire_categories`` from the arXiv categories.

    Uses side effects to populate the ``inspire_categories`` key
    in ``obj.data`` by converting its arXiv categories.

    Args:
        obj (WorkflowObject): a workflow object.
        eng (WorkflowEngine): a workflow engine.

    Returns:
        None

    """
    obj.data.setdefault('inspire_categories', [])

    for arxiv_category in get_arxiv_categories(obj.data):
        term = classify_field(arxiv_category)
        if term:
            inspire_category = {
                'source': 'arxiv',
                'term': term,
            }

            if inspire_category not in obj.data['inspire_categories']:
                obj.data['inspire_categories'].append(inspire_category)

示例#2

0

显示文件

文件： arxiv.py 项目： david-caro/inspire-next

def arxiv_derive_inspire_categories(obj, eng):
    """Derive ``inspire_categories`` from the arXiv categories.

    Uses side effects to populate the ``inspire_categories`` key
    in ``obj.data`` by converting its arXiv categories.

    Args:
        obj (WorkflowObject): a workflow object.
        eng (WorkflowEngine): a workflow engine.

    Returns:
        None

    """
    obj.data.setdefault('inspire_categories', [])

    for arxiv_category in get_arxiv_categories(obj.data):
        term = classify_field(arxiv_category)
        if term:
            inspire_category = {
                'source': 'arxiv',
                'term': term,
            }

            if inspire_category not in obj.data['inspire_categories']:
                obj.data['inspire_categories'].append(inspire_category)

示例#3

0

显示文件

def inspire_categories(self, key, value):
    schema = load_schema('elements/inspire_field')
    valid_sources = schema['properties']['source']['enum']

    inspire_categories = self.get('inspire_categories', [])

    scheme = force_single_element(value.get('2'))
    if scheme == 'arXiv':          # XXX: we skip arXiv categories here because
        return inspire_categories  # we're going to add them later in a filter.

    source = force_single_element(value.get('9', '')).lower()
    if source not in valid_sources:
        if source == 'automatically added based on dcc, ppf, dk':
            source = 'curator'
        elif source == 'submitter':
            source = 'user'
        else:
            source = None

    terms = force_list(value.get('a'))
    for _term in terms:
        term = classify_field(_term)
        if term:
            inspire_categories.append({
                'term': term,
                'source': source,
            })

    return inspire_categories

示例#4

0

显示文件

文件： arxiv.py 项目： zanachka/hepcrawl

    def parse(self):
        """Extract an arXiv record into an Inspire HEP record.

        Returns:
            dict: the same record in the Inspire Literature schema.
        """
        self.builder.add_abstract(abstract=self.abstract, source=self.source)
        self.builder.add_title(title=self.title, source=self.source)
        for license in self.licenses:
            self.builder.add_license(**license)
        for author in self.authors:
            self.builder.add_author(author)
        self.builder.add_number_of_pages(self.number_of_pages)
        self.builder.add_publication_info(**self.publication_info)
        for collab in self.collaborations:
            self.builder.add_collaboration(collab)
        for doi in self.dois:
            self.builder.add_doi(**doi)
        self.builder.add_preprint_date(self.preprint_date)
        if self.public_note:
            self.builder.add_public_note(self.public_note, self.source)
        for rep_number in self.report_numbers:
            self.builder.add_report_number(rep_number, self.source)
        self.builder.add_arxiv_eprint(self.arxiv_eprint, self.arxiv_categories)
        self.builder.add_private_note(self.private_note)
        self.builder.add_document_type(self.document_type)
        normalized_categories = [
            classify_field(arxiv_cat) for arxiv_cat in self.arxiv_categories
        ]
        self.builder.add_inspire_categories(dedupe_list(normalized_categories),
                                            'arxiv')

        return self.builder.record

示例#5

0

显示文件

文件： test_utils.py 项目： pazembrz/inspire-schemas

def test_classify_field_ignores_case():
    expected = 'Astrophysics'
    result = utils.classify_field('ASTRO-PH.CO')

    assert expected == result

示例#6

0

显示文件

文件： test_utils.py 项目： pazembrz/inspire-schemas

def test_classify_field_returns_other_if_category_not_found():
    expected = 'Other'
    result = utils.classify_field('quant-bio')

    assert expected == result

示例#7

0

显示文件

文件： test_utils.py 项目： pazembrz/inspire-schemas

def test_classify_field_normalizes_arxiv_category():
    expected = 'Math and Math Physics'
    result = utils.classify_field('math-dg')

    assert expected == result

示例#8

0

显示文件

文件： test_utils.py 项目： pazembrz/inspire-schemas

def test_classify_field_returns_category_for_inspire_category():
    expected = 'Astrophysics'
    result = utils.classify_field('Astrophysics')

    assert expected == result

示例#9

0

显示文件

文件： test_utils.py 项目： pazembrz/inspire-schemas

def test_classify_field_returns_category_for_arxiv_category():
    expected = 'Math and Math Physics'
    result = utils.classify_field('math.AG')

    assert expected == result

示例#10

0

显示文件

文件： test_utils.py 项目： pazembrz/inspire-schemas

def test_classify_field_returns_none_on_non_string_value():
    assert utils.classify_field(0) is None

示例#11

0

显示文件

文件： test_utils.py 项目： pazembrz/inspire-schemas

def test_classify_field_returns_none_on_falsy_value():
    assert utils.classify_field('') is None