示例#1
0
def test_comparing_publication_info_with_cnum():
    root = {}
    head = {
        'publication_info': [{
            "artid": "WEPAB127",
            "cnum": "C21-05-24.3",
            "conf_acronym": "IPAC2021",
            "year": 2021
        }]
    }
    update = {
        'publication_info': [{
            "artid": "WEPAB127",
            "cnum": "C21-05-24.3",
            "conf_acronym": "IPAC2021",
            "conference_record": {
                "$ref": "https://inspirehep.net/api/conferences/1853162"
            },
            "year": 2021
        }]
    }

    expected_conflict = []
    expected_merged = update

    root, head, update, expected_merged = add_arxiv_source(
        root, head, update, expected_merged)
    merged, conflict = merge(root, head, update, head_source='arxiv')

    merged = add_arxiv_source(merged)
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
def test_merging_full_name_field_keeps_longest_name():
    root = {
        'authors': [{
            'full_name': 'Pitts Kevin',
        }]
    }
    head = {
        'authors': [{
            'full_name': 'Pitts, Kevin John',
        }]
    }
    update = {
        'authors': [{
            'full_name': 'Pitts, Kevin',
        }]
    }

    expected_merged = head

    expected_conflict = []

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#3
0
def test_comparing_authors_unicode_name():
    root = {}
    head = {
        'authors': [
            {
                'full_name': 'Ortín, Tomás'
            },
        ],
    }
    update = {
        'authors': [
            {
                'full_name': 'Ortin, Tomas'
            },
        ],
    }

    expected_conflict = []
    expected_merged = head

    root, head, update, expected_merged = add_arxiv_source(
        root, head, update, expected_merged)
    merged, conflict = merge(root, head, update, head_source='arxiv')

    merged = add_arxiv_source(merged)
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#4
0
def merge_records(obj, eng):
    """Perform a manual merge.

    Merges two records stored in the workflow object as the content of the
    ``head`` and ``update`` keys, and stores the result in ``obj.data``.
    Also stores the eventual conflicts in ``obj.extra_data['conflicts']``.

    Because this is a manual merge we assume that the two records have no
    common ancestor, so ``root`` is the empty dictionary.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    head, update = obj.extra_data['head'], obj.extra_data['update']
    head_source = obj.extra_data['head_source']

    merged, conflicts = merge(
        root={},
        head=head,
        update=update,
        head_source=head_source,
    )

    obj.data = merged
    obj.extra_data['conflicts'] = conflicts
    obj.save()
示例#5
0
def test_merging_acquisition_source_publisher_on_arxiv(fake_get_config):
    root = {
        "acquisition_source": {
            "datetime": "2021-05-11T02:35:43.387350",
            "method": "hepcrawl",
            "source": "arXiv",
            "submission_number": "c8a0e3e0b20011eb8d930a580a6402c0"
        }
    }
    head = {
        "acquisition_source": {
            "datetime": "2021-05-11T02:35:43.387350",
            "method": "hepcrawl",
            "source": "arXiv",
            "submission_number": "c8a0e3e0b20011eb8d930a580a6402c0"
        }
    }
    update = {
        "acquisition_source": {
            "datetime": "2021-05-12T02:35:43.387350",
            "method": "beard",
            "source": "other source",
            "submission_number": "c8a0e3e0b20011eb8d930a580a6402c1"
        }
    }
    expected_merged = update
    expected_conflict = []
    merged, conflict = merge(root, head, update)
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#6
0
def merge_article_with_crossref_data(article):
    doi = get_value(article, "dois[0].value")

    if not doi:
        return article

    try:
        crossref_data = import_doi(doi)
    except (ImportConnectionError, ImportParsingError):
        LOGGER.exception("Cannot merge submission with %r,", doi)
        return article

    merged, conflicts = merge(root={}, head=article, update=crossref_data)

    try:
        validate(merged, "hep")
    except ValidationError:
        LOGGER.exception(
            "Merger returned invalid data while merging imported arxiv with crossref",
            doi=doi,
            arxiv=get_value(article, "arxiv_eprints[0].value"),
        )
        return article

    if conflicts:
        LOGGER.debug("Ignoring conflicts while enhancing submission.\n%r",
                     conflicts)

    return merged
示例#7
0
def test_merging_same_documents_arxiv_on_arxiv(fake_get_config):
    root = {
        "documents": [
            {
                "key": "pdf1.pdf",
                "description": "paper",
                "source": "arXiv",
                "fulltext": True,
                "url": "http://example.com/files/1234-1234-1234-1234/pdf1.pdf",
            },
            {
                "key": "pdf.tex",
                "description": "latex version",
                "source": "arXiv",
                "url": "http://example.com/files/1234-1234-1234-1234/pdf.tex",
            },
        ]
    }
    head = root
    update = root
    expected_merged = head
    expected_conflict = []
    merged, conflict = merge(root, head, update)
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#8
0
def test_merging_cleans_acquisition_source_for_arxiv_on_publisher(
        fake_get_config):
    root = {
        "acquisition_source": {
            "datetime": "2021-05-11T02:35:43.387350",
            "method": "arXiv",
            "source": "arXiv",
            "submission_number": "c8a0e3e0b20011eb8d930a580a6402c0"
        }
    }
    head = {
        "acquisition_source": {
            "datetime": "2021-05-11T02:35:43.387350",
            "method": "hepcrawl",
            "source": "desy",
            "submission_number": "c8a0e3e0b20011eb8d930a580a6402c0"
        }
    }
    update = {
        "acquisition_source": {
            "datetime": "2021-05-12T02:35:43.387350",
            "method": "hepcrawl",
            "source": "arXiv",
            "submission_number": "c8a0e3e0b20011eb8d930a580a6402c1"
        }
    }

    merged, conflict = merge(root, head, update)
    assert merged['acquisition_source']['source'] == 'arXiv'
def test_merging_inspire_categories_field():
    root = {
        'inspire_categories': [{
            'source': 'INSPIRE',
            'term': 'Theory-HEP'
        }]
    }
    head = {
        'inspire_categories': [{
            'source': 'curator',
            'term': 'Theory-HEP'
        }, {
            'source': 'curator',
            'term': 'Theory-Nucl'
        }]
    }
    update = {
        'inspire_categories': [{
            'source': 'arxiv',
            'term': 'Computing'
        }, {
            'source': 'arxiv',
            'term': 'Other'
        }]
    }

    expected_merged = head
    expected_conflict = []

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#10
0
def test_merging_dois_field_handles_repeated_values():
    root = {
        'dois': [{
            'material': 'preprint',
            'value': '10.1023/A:1026654312961'
        }]
    }
    head = {
        'dois': [
            {
                'material': 'publication',
                'value': '10.1023/A:1026654312961'
            },
            {
                'source': 'nowhere',
                'value': '10.1023/B:1026654312961'
            },
        ]
    }
    update = {
        'dois': [
            {
                'material': 'erratum',
                'value': '10.1023/A:1026654312961'
            },
            {
                'material': 'erratum',
                'source': 'nowhere',
                'value': '10.1023/B:1026654312961'
            },
        ]
    }

    expected_merged = {
        'dois': [
            {
                'material': 'publication',
                'value': '10.1023/A:1026654312961'
            },
            {
                'source': 'nowhere',
                'value': '10.1023/B:1026654312961'
            },
            {
                'material': 'erratum',
                'value': '10.1023/A:1026654312961'
            },
            {
                'material': 'erratum',
                'source': 'nowhere',
                'value': '10.1023/B:1026654312961'
            },
        ]
    }
    expected_conflict = []

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
def test_comparing_publication_info():
    root = {}
    head = {
        'publication_info': [
            {
                'journal_title': 'J. Testing',
                'journal_volume': '42',
            }
        ]
    }
    update = {
        'publication_info': [
            {
                'journal_title': 'J. Testing',
                'journal_volume': '42',
                'artid': 'foo',
            }
        ]
    }

    expected_conflict = []
    expected_merged = update

    root, head, update, expected_merged = add_arxiv_source(root, head, update, expected_merged)
    merged, conflict = merge(root, head, update, head_source='arxiv')

    merged = add_arxiv_source(merged)
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
def test_merging_titles_field():
    root = {
        'titles': [{
            'source':
            'arXiv',
            'title':
            'ANTARES: An observatory at the seabed '
            'to the confines of the Universe'
        }  # record: 1519935
                   ]
    }
    head = {
        'titles': [{
            'source':
            'arXiv',
            'subtitle':
            'this subtitle has been added by a curator',
            'title':
            'ANTARES: An observatory at the seabed '
            'to the confines of the Universe'
        }]
    }
    update = {
        'titles': [
            {
                'source': 'arXiv',
                'title': 'ANTARES: Un osservatorio foo bar'
            },
        ]
    }

    expected_merged = {
        'titles': [
            {
                'source':
                'arXiv',
                'subtitle':
                'this subtitle has been added by a curator',
                'title':
                'ANTARES: An observatory at the seabed '
                'to the confines of the Universe'
            },
        ]
    }
    expected_conflict = [{
        'path': '/titles/0',
        'op': 'add',
        'value': {
            'source': 'arXiv',
            'title': 'ANTARES: Un osservatorio foo bar'
        },
        '$type': 'INSERT'
    }]

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#13
0
def test_grobid_on_arxiv_operations_keeps_authors_from_head():
    root = {}
    authors_arxiv = {
        "authors": [
            {
                "full_name": "Kowal, Michal",
                "raw_affiliations": [{
                    "value": "Warsaw U."
                }],
                "emails": [
                    "*****@*****.**"
                ],
            }
        ]
    }
    authors_grobid = {
        "authors": [
            {
                "full_name": "Kowal, Michal",
                "raw_affiliations": [{
                    "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland"
                }],
                "emails": [
                    "*****@*****.**"
                ],
            },
            {
                "full_name": "Sułkowski, Piotr Andrzej",
                "raw_affiliations": [{
                    "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland"
                }],
                "emails": [
                    "*****@*****.**"
                ],
            }
        ]
    }

    expected_merged = {
        "authors": [
            {
                "raw_affiliations": [{
                    "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland"
                }],
                "emails": [
                    "*****@*****.**"
                ],
                "full_name": "Kowal, Michal"
            }
        ]
    }

    merged, conflicts = merge(root, authors_arxiv, authors_grobid, configuration=GrobidOnArxivAuthorsOperations)
    assert not conflicts
    assert merged == expected_merged
示例#14
0
def test_merger_handles_authors_with_correct_ordering():
    root = {}
    head = {
        "authors": [
            {
                'full_name': 'Janeway, Kathryn',
                'age': 44
            },
            {
                'full_name': 'Picard, Jean-Luc',
                'age': 55
            },
            {
                "full_name": "Archer, Jonathan",
            }
        ],
    }
    update = {
        "authors": [
            {
                "full_name": "Kirk, James"
            },
            {
                'full_name': 'Janeway Kathryn, Picard Jean-Luc', 'age': 66
            },
            {
                "full_name": "Archer, Jonathan",
            }
        ],
    }

    expected_conflict = [
        {
            'path': '/authors/1',
            'op': 'replace',
            'value': {'full_name': 'Janeway Kathryn, Picard Jean-Luc', 'age': 66},
            '$type': 'SET_FIELD'
        },
        {
            'path': '/authors/2',
            'op': 'replace',
            'value': {'full_name': 'Janeway Kathryn, Picard Jean-Luc', 'age': 66},
            '$type': 'SET_FIELD'
        },
    ]
    expected_merged = {'authors': [
        {"full_name": "Kirk, James"},
        {'age': 44, 'full_name': 'Janeway, Kathryn'},
        {'age': 55, 'full_name': 'Picard, Jean-Luc'},
        {"full_name": "Archer, Jonathan"}
    ]}

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert conflict.sort(key=itemgetter('path')) == expected_conflict.sort(key=itemgetter('path'))
def test_comparing_references_field_different_dois():
    root = {}
    head = {
        'references': [
            {
                'reference': {
                    'dois': [
                        '10.1099/bar',
                    ],
                }
            }
        ]
    }
    update = {
        'references': [
            {
                'reference': {
                    'dois': [
                        '10.1099/foo',
                    ],
                    'document_type': 'article',
                }
            }
        ]
    }

    expected_conflict = []

    expected_merged = {
        'references': [
            {
                'reference': {
                    'dois': [
                        '10.1099/bar',
                    ],
                }
            },
            {
                'reference': {
                    'dois': [
                        '10.1099/foo',
                    ],
                    'document_type': 'article',
                }
            }
        ]
    }

    root, head, update, expected_merged = add_arxiv_source(root, head, update, expected_merged)
    merged, conflict = merge(root, head, update, head_source='arxiv')

    merged = add_arxiv_source(merged)
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#16
0
def test_merger_handles_list_deletions():
    root = {
        'book_series': [
            {
                'title': 'IEEE Nucl.Sci.Symp.Conf.Rec.',
                'volume': '1'
            },
            {
                'title': 'CMS Web-Based Monitoring',
                'volume': '2'
            },
            {
                'title': 'Lectures in Testing',
                'volume': '3',
            },
        ]
    }
    head = {}
    update = {
        'book_series': [
            {
                'title': 'Lectures in Testing',
                'volume': '3',
            },
        ]
    }

    expected_merged = head
    expected_conflict = [{
        'path': '/book_series/0/volume',
        'op': 'replace',
        'value': '3',
        '$type': 'SET_FIELD'
    }, {
        'path': '/book_series/0/title',
        'op': 'replace',
        'value': 'Lectures in Testing',
        '$type': 'SET_FIELD'
    }, {
        'path': '/book_series/2',
        'op': 'remove',
        'value': None,
        '$type': 'REMOVE_FIELD'
    }, {
        'path': '/book_series/1',
        'op': 'remove',
        'value': None,
        '$type': 'REMOVE_FIELD'
    }]

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert sorted(conflict, key=operator.itemgetter("path")) == sorted(
        expected_conflict, key=operator.itemgetter("path"))
def test_figures():
    root = {}
    head = {
        'figures': [{
            'key':
            'figure1.png',
            'caption':
            'Figure 1',
            'source':
            'arXiv',
            'url':
            'http://example.comfiles/1234-1234-1234-1234/figure1.png',
        }, {
            'key':
            'figure2.png',
            'caption':
            'Figure 2',
            'source':
            'arXiv',
            'url':
            'http://example.com/files/1234-1234-1234-1234/figure2.png',
        }]
    }
    update = {
        'figures': [{
            'key':
            'new_figure1.png',
            'caption':
            'Figure 1',
            'source':
            'arXiv',
            'url':
            'http://example.com/files/5678-5678-5678-5678/figure1.png',
        }, {
            'key':
            'new_figure2.png',
            'caption':
            'Figure 2',
            'source':
            'arXiv',
            'url':
            'http://example.com/files/5678-5678-5678-5678/figure2.png',
        }]
    }

    expected_merged = update
    expected_conflict = []

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#18
0
def test_ordering_conflicts():
    # This test is actually for broken input.
    # Where authors are duplicated.

    root = load_test_data("test_data/root.json")
    head = load_test_data("test_data/head.json")
    update = load_test_data("test_data/update.json")

    expected_conflicts = load_test_data("test_data/conflicts.json")
    expected_merged = load_test_data("test_data/merged.json")
    merged, conflicts = merge(root, head, update)

    assert sorted(merged['authors'], key=itemgetter('uuid')) == sorted(expected_merged['authors'], key=itemgetter('uuid'))
    assert_ordered_conflicts(conflicts, expected_conflicts)
示例#19
0
def merge_articles(obj, eng):
    """Merge two articles.

    The workflow payload is overwritten by the merged record, the conflicts are
    stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which
    contains the endpoint which resolves the merge conflicts.

    Note:
        When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it
        will skip the merge.

    """
    if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'):
        return None

    matched_control_number = obj.extra_data['matches']['approved']

    head_uuid = PersistentIdentifier.get('lit',
                                         matched_control_number).object_uuid

    head_record = InspireRecord.get_record(head_uuid)
    update = obj.data
    update_source = LiteratureReader(obj.data).source
    head_root = read_wf_record_source(record_uuid=head_record.id,
                                      source=update_source.lower())
    head_root = head_root.json if head_root else {}

    obj.extra_data['head_uuid'] = str(head_uuid)
    obj.extra_data['head_version_id'] = head_record.model.version_id
    obj.extra_data['merger_head_revision'] = head_record.revision_id
    obj.extra_data['merger_original_root'] = deepcopy(head_root)

    merged, conflicts = merge(
        head=head_record.to_dict(),
        root=head_root,
        update=update,
    )

    obj.data = merged

    if conflicts:
        obj.extra_data['conflicts'] = conflicts
        obj.extra_data['conflicts_metadata'] = {
            'datetime': datetime.now().strftime("%b %d, %Y, %H:%M:%S %p"),
            'update_source': update_source,
        }
        obj.extra_data['callback_url'] = \
            get_resolve_merge_conflicts_callback_url()
    obj.save()
def test_documents():
    root = {}
    head = {
        'documents': [
            {
                'key': 'pdf1.pdf',
                'description': 'paper',
                'source': 'arXiv',
                'fulltext': True,
                'url': 'http://example.com/files/1234-1234-1234-1234/pdf1.pdf',
            },
            {
                'key': 'pdf.tex',
                'description': 'latex version',
                'source': 'arXiv',
                'url': 'http://example.com/files/1234-1234-1234-1234/pdf.tex',
            },
        ]
    }
    update = {
        'documents': [{
            'key':
            'pdf.pdf',
            'description':
            'paper',
            'source':
            'arXiv',
            'url':
            'http://example.com/files/5678-5678-5678-5678/pdf.pdf',
        }, {
            'key':
            'foo.xml',
            'description':
            'some xml files',
            'source':
            'arXiv',
            'url':
            'http://example.com/files/5678-5678-5678-5678/foo.xml',
        }]
    }

    expected_merged = update
    expected_conflict = []

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
def test_head_curates_author_no_duplicate():
    # https://labs.inspirehep.net/api/holdingpen/1268973
    root = {
        'authors': [
            {
                "full_name": "Li, Zhengxiang"
            },
        ]
    }
    head = {
        "authors": [{
            "affiliations": [{
                "value": "Beijing Normal U."
            }],
            "full_name": "Li, Zheng-Xiang",
        }]
    }
    update = {
        'authors': [
            {
                "full_name": "Li, Zhengxiang"
            },
        ]
    }

    expected_merged = {
        'authors': [{
            'full_name': 'Li, Zhengxiang'
        }, {
            'full_name': 'Li, Zheng-Xiang',
            'affiliations': [{
                'value': 'Beijing Normal U.'
            }]
        }]
    }

    expected_conflict = [{
        'path': '/authors/1',
        'op': 'remove',
        'value': None,
        '$type': 'REMOVE_FIELD'
    }]

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert conflict == expected_conflict
    validate_subschema(merged)
示例#22
0
def test_grobid_on_arxiv_operations_without_conflict():
    root = {}
    authors_arxiv = {
        "authors": [
            {
                "full_name": "Sułkowski, Piotr",
                "raw_affiliations": [{
                    "value": "Warsaw U."
                }],
                "emails": [
                    "*****@*****.**"
                ],
            }
        ]
    }

    authors_grobid = {
        "authors": [
            {
                "full_name": "Sułkowski, Piotr",
                "raw_affiliations": [{
                    "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland"
                }],
                "emails": [
                    "*****@*****.**"
                ],
            }
        ]
    }

    expected_merged = {
        "authors": [
            {
                "raw_affiliations": [{
                    "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland"
                }],
                "emails": [
                    "*****@*****.**"
                ],
                "full_name": "Sułkowski, Piotr"
            }
        ]
    }

    merged, conflicts = merge(root, authors_arxiv, authors_grobid, configuration=GrobidOnArxivAuthorsOperations)
    assert not conflicts
    assert merged == expected_merged
示例#23
0
def merge_article_with_crossref_data(article):
    doi = get_value(article, "dois[0].value")

    if not doi:
        return article

    try:
        crossref_data = import_doi(doi)
    except (ImportConnectionError, ImportParsingError):
        LOGGER.exception("Cannot merge submission with %r,", doi)
        return article

    merged, conflicts = merge(root={}, head=article, update=crossref_data)
    article = merged
    if conflicts:
        LOGGER.debug("Ignoring conflicts while enhancing submission.\n%r",
                     conflicts)
    return article
def test_merging_acquisition_source_field():
    root = {}
    # record_id: 1517095
    head = {'acquisition_source': {'method': 'submitter', 'source': 'arxiv'}}
    update = {
        'acquisition_source': {
            'method': 'batchuploader',
            'source': 'arxiv'
        }
    }

    expected_merged = update
    expected_conflict = []

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
def test_merging_license_field():
    root = {
        'license': [{
            'imposing': 'Elsevier',
            'url': 'http://creativecommons.org/licenses/by/4.0/',
            'license': 'elsevier foo bar'
        }]
    }
    head = {
        'license': [{
            'imposing': 'Elsevier',
            'url': 'http://creativecommons.org/licenses/by/4.0/',
            'license': 'elsevier foo bar'
        }, {
            'imposing': 'arXiv',
            'url': 'http://creativecommons.org/licenses/by/4.0/',
            'license': 'arxiv foo bar'
        }]
    }
    update = {
        'license': [{
            'imposing': 'Elsevier',
            'url': 'http://creativecommons.org/licenses/by/4.0/',
            'license': 'elsevier foo bar updated!'
        }]
    }

    expected_merged = {
        'license': [{
            'imposing': 'Elsevier',
            'url': 'http://creativecommons.org/licenses/by/4.0/',
            'license': 'elsevier foo bar updated!'
        }, {
            'imposing': 'arXiv',
            'url': 'http://creativecommons.org/licenses/by/4.0/',
            'license': 'arxiv foo bar'
        }]
    }
    expected_conflict = []

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#26
0
def test_merging_publication_info_for_arxiv_on_publisher(fake_get_config):
    root = {
        "publication_info": [{
            "year": 2021,
            "artid": "051701",
            "material": "publication",
            "journal_issue": "5",
            "journal_title": "root title",
            "journal_record": {
                "$ref": "https://inspirehep.net/api/journals/1613970"
            },
            "journal_volume": "104",
        }]
    }
    head = {
        "publication_info": [{
            "year": 2021,
            "artid": "051701",
            "material": "publication",
            "journal_issue": "5",
            "journal_title": "head title",
            "journal_record": {
                "$ref": "https://inspirehep.net/api/journals/1613970"
            },
            "journal_volume": "104",
        }]
    }
    update = {
        "publication_info": [{
            "year": 2021,
            "artid": "051701",
            "material": "publication",
            "journal_issue": "5",
            "journal_title": "update title",
            "journal_record": {
                "$ref": "https://inspirehep.net/api/journals/1613970"
            },
            "journal_volume": "104",
        }]
    }

    merged, conflict = merge(root, head, update)
    assert len(merged['publication_info']) == 1
    assert merged['publication_info'][0]['journal_title'] == 'head title'
示例#27
0
def test_comparing_keywords():
    root = {}
    head = {
        'keywords': [{
            'value': 'shielding',
            'schema': 'JACOW',
        }, {
            'value': 'test',
            'schema': 'JACOW',
        }]
    }
    update = {
        'keywords': [{
            'value': 'shielding',
            'schema': 'INSPIRE',
        }, {
            'value': 'shielding',
            'schema': 'JACOW',
        }]
    }

    expected_conflict = []
    expected_merged = {
        'keywords': [
            {
                'value': 'shielding',
                'schema': 'INSPIRE',
            },
            {
                'value': 'shielding',
                'schema': 'JACOW',
            },
            {
                'value': 'test',
                'schema': 'JACOW',
            },
        ]
    }

    merged, conflict = merge(root, head, update, head_source='arxiv')

    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#28
0
def merge_articles(obj, eng):
    """Merge two articles.

    The workflow payload is overwritten by the merged record, the conflicts are
    stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which
    contains the endpoint which resolves the merge conflicts.

    Note:
        When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it
        will skip the merge.

    """
    if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'):
        return None

    matched_control_number = obj.extra_data['matches']['approved']

    head_uuid = PersistentIdentifier.get('lit',
                                         matched_control_number).object_uuid

    obj.extra_data['head_uuid'] = str(head_uuid)

    head = InspireRecord.get_record(head_uuid)
    update = obj.data
    update_source = get_source(update).lower()
    head_root = read_wf_record_source(record_uuid=head.id,
                                      source=update_source)
    head_root = head_root.json if head_root else {}

    merged, conflicts = merge(
        head=head.dumps(),
        root=head_root,
        update=update,
    )

    obj.data = merged

    if conflicts:
        obj.extra_data['conflicts'] = conflicts
        obj.extra_data['callback_url'] = \
            get_resolve_merge_conflicts_callback_url()
    obj.save()
def test_merging_raw_affiliations_field():
    root = {}
    head = {
        'authors': [{
            'full_name':
            'Pitts, Kevin T',
            'raw_affiliations': [{
                'source':
                'arxiv',
                'value':
                'Department of Physics, Indiana University, Bloomington, IN 47405, USA'
            }]
        }]
    }
    update = {
        'authors': [{
            'full_name':
            'Pitts, Kevin T',
            'raw_affiliations': [{
                'source':
                'arxiv',
                'value':
                'Department of Physics, Indiana University, Bloomington, IN 47405, US'
            }, {
                'source': 'arxiv',
                'value': 'Padua U',
            }]
        }]
    }

    expected_merged = update
    expected_conflict = []

    merged, conflict = merge(root, head, update, head_source='arxiv')
    assert merged == expected_merged
    assert_ordered_conflicts(conflict, expected_conflict)
    validate_subschema(merged)
示例#30
0
def merge_articles(obj, eng):
    """Merge two articles.

    The workflow payload is overwritten by the merged record, the conflicts are
    stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which
    contains the endpoint which resolves the merge conflicts.

    Note:
        For the time being the ``root`` will be ignored, and we'll rely only
        on the ``head``, hence it is a rootless implementation. Also when
        the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it
        will skip the merge.

    """
    if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'):
        return None

    matched_control_number = obj.extra_data['matches']['approved']

    head_uuid = PersistentIdentifier.get('lit',
                                         matched_control_number).object_uuid

    obj.extra_data['head_uuid'] = str(head_uuid)

    head = InspireRecord.get_record(head_uuid)
    root = {}
    update = obj.data

    merged, conflicts = merge(head=head.dumps(), root=root, update=update)

    obj.data = merged

    if conflicts:
        obj.extra_data['conflicts'] = conflicts
        obj.extra_data['callback_url'] = \
            get_resolve_merge_conflicts_callback_url()
    obj.save()