示例#1
0
    def test_14_oag_record_callback_duplicate(self):
        # first make ourselves a job/record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        # make two distinct records with the same ids
        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.save()

        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier" : [{
                "id" : "PMC1234",
                "type" : "pmcid"
            }],
            "license" : [{
                "type" : "cc-by",
                "provenance" : {
                    "accepted_author_manuscript" : True,
                    "description" : "Provenance PMC1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # give the index a moment to catch up
        time.sleep(2)

        # read the duplicate records out of the index
        records = [r for r in models.Record.get_by_identifier("PMC1234", job.id, "pmcid")]

        # there should be 2 of them
        assert len(records) == 2
        for record in records:
            assert isinstance(r, models.Record)

            # both records should have the same data
            # licence added, source=epmc, pmcid=success, provenance added, aam set
            assert record.licence_type == "cc-by"
            assert record.licence_source == "epmc"
            assert record.oag_pmcid == "success"
            assert record.aam_from_epmc is True
            assert record.aam is True
            provs = [n for b, w, n in record.provenance]
            assert len(provs) == 2
            assert "PMC1234 - Provenance PMC1234" in provs
            assert "Detected AAM status from EPMC web page" in provs
            assert record.oag_complete is True
示例#2
0
    def test_03_handle_oag_response_01_pmcid_success(self):
        # first make ourselves a job/record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.save()

        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier" : [{
                "id" : "PMC1234",
                "type" : "pmcid"
            }],
            "license" : [{
                "type" : "cc-by",
                "provenance" : {
                    "accepted_author_manuscript" : True,   # FIXME: provisional
                    "description" : "Provenance PMC1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should not have added anything to the rerun
        assert len(oag_rerun) == 0

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next()
        assert isinstance(r2, models.Record)
        assert r2.id == record.id
        assert r2.pmcid == "PMC1234"

        # licence added, source=epmc, pmcid=success, provenance added, aam set
        assert r2.licence_type == "cc-by"
        assert r2.licence_source == "epmc"
        assert r2.oag_pmcid == "success"
        assert r2.aam_from_epmc is True
        assert r2.aam is True
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 2
        assert "PMC1234 - Provenance PMC1234" in provs
        assert "Detected AAM status from EPMC web page" in provs
示例#3
0
    def test_03_handle_oag_response_01_pmcid_success(self):
        # first make ourselves a job/record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.save()

        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier": [{
                "id": "PMC1234",
                "type": "pmcid"
            }],
            "license": [{
                "type": "cc-by",
                "provenance": {
                    "accepted_author_manuscript": True,  # FIXME: provisional
                    "description": "Provenance PMC1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should not have added anything to the rerun
        assert len(oag_rerun) == 0

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next()
        assert isinstance(r2, models.Record)
        assert r2.id == record.id
        assert r2.pmcid == "PMC1234"

        # licence added, source=epmc, pmcid=success, provenance added, aam set
        assert r2.licence_type == "cc-by"
        assert r2.licence_source == "epmc"
        assert r2.oag_pmcid == "success"
        assert r2.aam_from_epmc is True
        assert r2.aam is True
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 2
        assert "PMC1234 - Provenance PMC1234" in provs
        assert "Detected AAM status from EPMC web page" in provs
示例#4
0
    def test_03_handle_oag_response_02_pmcid_fto(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.doi = "10.1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier" : [{
                "id" : "PMC1234",
                "type" : "pmcid"
            }],
            "license" : [{
                "type" : "failed-to-obtain-license",
                "provenance" : {
                    "accepted_author_manuscript" : True,   # FIXME: provisional
                    "description" : "FTO PMC1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should have added the DOI to the re-run
        assert len(oag_rerun) == 1
        assert oag_rerun[0]["id"] == "10.1234"
        assert oag_rerun[0]["type"] == "doi"

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next()
        assert isinstance(r2, models.Record)

        # provenance added, pmcid=fto, aam set
        assert r2.licence_type is None
        assert r2.oag_pmcid == "fto"
        assert r2.aam_from_epmc is True
        assert r2.aam is True
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 2
        assert "PMC1234 - FTO PMC1234" in provs
        assert "Detected AAM status from EPMC web page" in provs
示例#5
0
    def test_03_handle_oag_response_02_pmcid_fto(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.doi = "10.1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier": [{
                "id": "PMC1234",
                "type": "pmcid"
            }],
            "license": [{
                "type": "failed-to-obtain-license",
                "provenance": {
                    "accepted_author_manuscript": True,  # FIXME: provisional
                    "description": "FTO PMC1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should have added the DOI to the re-run
        assert len(oag_rerun) == 1
        assert oag_rerun[0]["id"] == "10.1234"
        assert oag_rerun[0]["type"] == "doi"

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next()
        assert isinstance(r2, models.Record)

        # provenance added, pmcid=fto, aam set
        assert r2.licence_type is None
        assert r2.oag_pmcid == "fto"
        assert r2.aam_from_epmc is True
        assert r2.aam is True
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 2
        assert "PMC1234 - FTO PMC1234" in provs
        assert "Detected AAM status from EPMC web page" in provs
示例#6
0
    def test_03_handle_oag_response_04_pmcid_no_change(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.licence_type = "CC BY"
        record.aam = True
        record.aam_from_xml = True
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier" : [{
                "id" : "PMC1234",
                "type" : "pmcid"
            }],
            "license" : [{
                "type" : "failed-to-obtain-license",
                "provenance" : {
                    "accepted_author_manuscript" : False,   # FIXME: provisional
                    "description" : "You won't see this PMC1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should not have added anything to the rerun
        assert len(oag_rerun) == 0

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next()
        assert isinstance(r2, models.Record)

        # expecting no changes
        assert r2.licence_type == "CC BY"
        assert r2.licence_source is None
        assert r2.oag_pmcid is None
        assert r2.aam_from_epmc is False
        assert r2.aam is True
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 0
示例#7
0
    def test_03_handle_oag_response_04_pmcid_no_change(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.licence_type = "CC BY"
        record.aam = True
        record.aam_from_xml = True
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier": [{
                "id": "PMC1234",
                "type": "pmcid"
            }],
            "license": [{
                "type": "failed-to-obtain-license",
                "provenance": {
                    "accepted_author_manuscript": False,  # FIXME: provisional
                    "description": "You won't see this PMC1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should not have added anything to the rerun
        assert len(oag_rerun) == 0

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next()
        assert isinstance(r2, models.Record)

        # expecting no changes
        assert r2.licence_type == "CC BY"
        assert r2.licence_source is None
        assert r2.oag_pmcid is None
        assert r2.aam_from_epmc is False
        assert r2.aam is True
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 0
示例#8
0
    def test_03_handle_oag_response_07_doi_fto(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.doi = "10.1234"
        record.pmid = "1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier" : [{
                "id" : "10.1234",
                "type" : "doi"
            }],
            "license" : [{
                "type" : "failed-to-obtain-license",
                "provenance" : {
                    "description" : "FTO 10.1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should have added the DOI to the re-run
        assert len(oag_rerun) == 1
        assert oag_rerun[0]["id"] == "1234"
        assert oag_rerun[0]["type"] == "pmid"

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("10.1234", job.id).next()
        assert isinstance(r2, models.Record)

        # provenance added, doi=fto, pmid reprocess
        assert r2.licence_type is None
        assert r2.oag_doi == "fto"
        assert r2.aam is None
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 1
        assert "10.1234 - FTO 10.1234" in provs
示例#9
0
    def test_03_handle_oag_response_07_doi_fto(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.doi = "10.1234"
        record.pmid = "1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier": [{
                "id": "10.1234",
                "type": "doi"
            }],
            "license": [{
                "type": "failed-to-obtain-license",
                "provenance": {
                    "description": "FTO 10.1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should have added the DOI to the re-run
        assert len(oag_rerun) == 1
        assert oag_rerun[0]["id"] == "1234"
        assert oag_rerun[0]["type"] == "pmid"

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("10.1234", job.id).next()
        assert isinstance(r2, models.Record)

        # provenance added, doi=fto, pmid reprocess
        assert r2.licence_type is None
        assert r2.oag_doi == "fto"
        assert r2.aam is None
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 1
        assert "10.1234 - FTO 10.1234" in provs
示例#10
0
    def test_03_handle_oag_response_06_doi_success(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.doi = "10.1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier": [{
                "id": "10.1234",
                "type": "doi"
            }],
            "license": [{
                "type": "cc-by",
                "provenance": {
                    "description": "Provenance 10.1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should not have added anything to the rerun
        assert len(oag_rerun) == 0

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier(
            "10.1234",
            job.id).next()  # leave out the "doi" type just for the hell of it
        assert isinstance(r2, models.Record)

        # licence added, source=publisher, doi=success, provenance added
        assert r2.licence_type == "cc-by"
        assert r2.licence_source == "publisher"
        assert r2.oag_doi == "success"
        assert r2.aam is None
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 1
        assert "10.1234 - Provenance 10.1234" in provs
示例#11
0
    def test_03_handle_oag_response_09_pmid_success(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmid = "1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier" : [{
                "id" : "1234",
                "type" : "pmid"
            }],
            "license" : [{
                "type" : "cc-by",
                "provenance" : {
                    "description" : "Provenance 1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should not have added anything to the rerun
        assert len(oag_rerun) == 0

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("1234", job.id, "pmid").next()
        assert isinstance(r2, models.Record)

        # licence added, source=publisher, doi=success, provenance added
        assert r2.licence_type == "cc-by"
        assert r2.licence_source == "publisher"
        assert r2.oag_pmid == "success"
        assert r2.aam is None
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 1
        assert "1234 - Provenance 1234" in provs
示例#12
0
    def test_03_handle_oag_response_05_pmcid_error(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.pmid = "1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier" : {
                "id" : "PMC1234",
                "type" : "pmcid"
            },
            "error" : "broken!"
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should have added the PMID to the re-run
        assert len(oag_rerun) == 1
        assert oag_rerun[0]["id"] == "1234"
        assert oag_rerun[0]["type"] == "pmid"

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next()
        assert isinstance(r2, models.Record)

        # provenance added, pmcid=error, pmid reprocess
        assert r2.licence_type is None
        assert r2.oag_pmcid == "error"
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 1
        assert "PMC1234 - broken!" in provs
示例#13
0
    def test_03_handle_oag_response_05_pmcid_error(self):
        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.pmid = "1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier": {
                "id": "PMC1234",
                "type": "pmcid"
            },
            "error": "broken!"
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # should have added the PMID to the re-run
        assert len(oag_rerun) == 1
        assert oag_rerun[0]["id"] == "1234"
        assert oag_rerun[0]["type"] == "pmid"

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next()
        assert isinstance(r2, models.Record)

        # provenance added, pmcid=error, pmid reprocess
        assert r2.licence_type is None
        assert r2.oag_pmcid == "error"
        provs = [n for b, w, n in r2.provenance]
        assert len(provs) == 1
        assert "PMC1234 - broken!" in provs
示例#14
0
    def test_12_licence_translate(self):
        assert workflow.translate_licence_type(
            "free-to-read") == "non-standard-licence"

        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier": [{
                "id": "PMC1234",
                "type": "pmcid"
            }],
            "license": [{
                "type": "free-to-read",
                "provenance": {
                    "accepted_author_manuscript": False,  # FIXME: provisional
                    "description": "FtR PMC1234"
                }
            }]
        }

        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id).next()
        assert isinstance(r2, models.Record)

        assert r2.licence_type == "non-standard-licence"
示例#15
0
    def test_12_licence_translate(self):
        assert workflow.translate_licence_type("free-to-read") == "non-standard-licence"

        # first make ourselves a record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.save()
        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier" : [{
                "id" : "PMC1234",
                "type" : "pmcid"
            }],
            "license" : [{
                "type" : "free-to-read",
                "provenance" : {
                    "accepted_author_manuscript" : False,   # FIXME: provisional
                    "description" : "FtR PMC1234"
                }
            }]
        }

        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # give the index a moment to catch up
        time.sleep(2)

        r2 = models.Record.get_by_identifier("PMC1234", job.id).next()
        assert isinstance(r2, models.Record)

        assert r2.licence_type == "non-standard-licence"
示例#16
0
    def test_14_oag_record_callback_duplicate(self):
        # first make ourselves a job/record that we want to enhance
        job = models.SpreadsheetJob()
        job.save()

        # make two distinct records with the same ids
        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.save()

        record = models.Record()
        record.upload_id = job.id
        record.pmcid = "PMC1234"
        record.save()

        time.sleep(2)

        # construct the OAG response object, which has detected a licence
        oag_result = {
            "identifier": [{
                "id": "PMC1234",
                "type": "pmcid"
            }],
            "license": [{
                "type": "cc-by",
                "provenance": {
                    "accepted_author_manuscript": True,
                    "description": "Provenance PMC1234"
                }
            }]
        }

        # call the oag record callback
        oag_rerun = []
        workflow.oag_record_callback(oag_result, oag_rerun, job)

        # give the index a moment to catch up
        time.sleep(2)

        # read the duplicate records out of the index
        records = [
            r for r in models.Record.get_by_identifier("PMC1234", job.id,
                                                       "pmcid")
        ]

        # there should be 2 of them
        assert len(records) == 2
        for record in records:
            assert isinstance(r, models.Record)

            # both records should have the same data
            # licence added, source=epmc, pmcid=success, provenance added, aam set
            assert record.licence_type == "cc-by"
            assert record.licence_source == "epmc"
            assert record.oag_pmcid == "success"
            assert record.aam_from_epmc is True
            assert record.aam is True
            provs = [n for b, w, n in record.provenance]
            assert len(provs) == 2
            assert "PMC1234 - Provenance PMC1234" in provs
            assert "Detected AAM status from EPMC web page" in provs
            assert record.oag_complete is True