Пример #1
0
    def _save_identifiers_to_kb(obj, eng):
        from inspirehep.utils.knowledge import save_keys_to_kb

        record = get_record_from_obj(obj, eng)

        identifiers = record.get(identifier_key, [])
        save_keys_to_kb(kb_name, identifiers, obj.id)
Пример #2
0
def exists_in_holding_pen(obj, eng):
    """Check if a record exists in HP by looking in given KB."""
    from invenio_workflows.search import search as hp_search
    record = get_record_from_obj(obj, eng)

    identifiers = []
    for field, lookup in six.iteritems(
            current_app.config.get("HOLDING_PEN_MATCH_MAPPING")):
        # Add quotes around to make the search exact
        identifiers += ['{0}:"{1}"'.format(field, i)
                        for i in record.get(lookup, [])]
    # Search for any existing record in Holding Pen, exclude self
    if identifiers:
        result = set(hp_search(
            query=" OR ".join(identifiers),
            per_page=10,
            page=1,
        )[0]) - set([obj.id])
        if result:
            obj.log.info("Record already found in Holding Pen ({0})".format(
                result
            ))
        obj.extra_data["holdingpen_ids"] = list(result)
        return result
    return False
Пример #3
0
    def _create_curation_ticket(obj, eng):
        from invenio_access.control import acc_get_user_email

        requestors = acc_get_user_email(obj.id_user)
        record = get_record_from_obj(obj, eng)

        if obj.extra_data.get("core"):
            subject, body = get_curation_body(template,
                                              record,
                                              requestors,
                                              obj.extra_data)
            submit_rt_ticket(obj,
                             queue,
                             subject,
                             body,
                             requestors,
                             ticket_id_key)
Пример #4
0
    def test_harvesting_workflow_accepted(self, search):
        """Test a full harvesting workflow."""
        from invenio_base.globals import cfg
        from invenio_workflows.api import start
        from inspirehep.utils.helpers import (
            get_record_from_obj,
        )

        # Mock Elasticsearch search for Holding Pen check
        search.return_value = ([], 0)

        # Mock matching checks
        responses.add(
            responses.GET,
            re.compile(".*record/_search"),
            status=200,
            body="""{
               "hits": {
                  "total": 0,
                  "max_score": null,
                  "hits": []
               }
            }""",
            content_type='application/json'
        )

        responses.add(
            responses.GET,
            'http://arxiv.org/e-print/1511.01097',
            content_type="application/x-eprint-tar",
            body=self.arxiv_tarball_accept.read(),
            status=200,
            adding_headers={
                "Content-Encoding": 'x-gzip',
            },
        )

        responses.add(
            responses.GET,
            'http://arxiv.org/pdf/1511.01097',
            content_type="application/pdf",
            body=self.arxiv_pdf_accept.read(),
            status=200,
            stream=True,
        )

        robotupload_url = os.path.join(
            cfg.get("CFG_ROBOTUPLOAD_SUBMISSION_BASEURL"),
            "batchuploader/robotupload/insert"
        )

        responses.add(
            responses.POST,
            robotupload_url,
            body="[INFO] bibupload batchupload --insert /dummy/file/path\n",
            status=200,
        )
        workflow = start('harvesting_fixture',
                         data=[self.record_oai_arxiv_accept],
                         module_name='unit_tests')

        # Let's get the halted record
        obj = workflow.halted_objects[0]

        # Now let's resolve it as accepted and continue
        obj.remove_action()
        obj.extra_data["approved"] = True
        obj.extra_data["core"] = True
        obj.set_extra_data(obj.extra_data)
        obj.save()
        workflow = obj.continue_workflow()

        record = get_record_from_obj(obj, workflow)
        # Now it is CORE
        self.assertTrue("CORE" in record.get("collections.primary"))
Пример #5
0
    def test_harvesting_workflow_rejected(self, search):
        """Test a full harvesting workflow."""
        from invenio_workflows.api import start
        from inspirehep.utils.helpers import (
            get_record_from_obj,
        )

        # Mock Elasticsearch search for Holding Pen check
        search.return_value = ([], 0)

        # Mock matching checks
        responses.add(
            responses.GET,
            re.compile(".*record/_search"),
            status=200,
            body="""{
               "hits": {
                  "total": 0,
                  "max_score": null,
                  "hits": []
               }
            }""",
            content_type='application/json'
        )

        responses.add(
            responses.GET,
            'http://arxiv.org/e-print/1407.7587',
            content_type="application/x-eprint-tar",
            body=self.arxiv_tarball.read(),
            status=200,
            adding_headers={
                "Content-Encoding": 'x-gzip',
            },
        )

        responses.add(
            responses.GET,
            'http://arxiv.org/pdf/1407.7587',
            content_type="application/pdf",
            body=self.arxiv_pdf.read(),
            status=200,
            stream=True,
        )

        workflow = start('harvesting_fixture',
                         data=[self.record_oai_arxiv_plots],
                         module_name='unit_tests')

        # Let's get the record metadata and check contents
        obj = workflow.completed_objects[0]
        record = get_record_from_obj(obj, workflow)

        # This record should be rejected
        self.assertFalse(obj.extra_data["approved"])

        # Files should have been attached (tarball + pdf)
        self.assertTrue(len(obj.data["files"]) == 2)

        # Some plots/files should have been added to FFTs
        self.assertTrue(record.get('fft'))

        # A publication note should have been extracted
        self.assertTrue(record.get('publication_info'))

        # A prediction should have been made
        self.assertTrue(obj.get_tasks_results().get("arxiv_guessing"))

        # It is not CORE
        self.assertFalse("CORE" in record.get("collections.primary"))