def test_14_oag_record_callback_duplicate(self): # first make ourselves a job/record that we want to enhance job = models.SpreadsheetJob() job.save() # make two distinct records with the same ids record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier" : [{ "id" : "PMC1234", "type" : "pmcid" }], "license" : [{ "type" : "cc-by", "provenance" : { "accepted_author_manuscript" : True, "description" : "Provenance PMC1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # give the index a moment to catch up time.sleep(2) # read the duplicate records out of the index records = [r for r in models.Record.get_by_identifier("PMC1234", job.id, "pmcid")] # there should be 2 of them assert len(records) == 2 for record in records: assert isinstance(r, models.Record) # both records should have the same data # licence added, source=epmc, pmcid=success, provenance added, aam set assert record.licence_type == "cc-by" assert record.licence_source == "epmc" assert record.oag_pmcid == "success" assert record.aam_from_epmc is True assert record.aam is True provs = [n for b, w, n in record.provenance] assert len(provs) == 2 assert "PMC1234 - Provenance PMC1234" in provs assert "Detected AAM status from EPMC web page" in provs assert record.oag_complete is True
def test_03_handle_oag_response_01_pmcid_success(self): # first make ourselves a job/record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier" : [{ "id" : "PMC1234", "type" : "pmcid" }], "license" : [{ "type" : "cc-by", "provenance" : { "accepted_author_manuscript" : True, # FIXME: provisional "description" : "Provenance PMC1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should not have added anything to the rerun assert len(oag_rerun) == 0 # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next() assert isinstance(r2, models.Record) assert r2.id == record.id assert r2.pmcid == "PMC1234" # licence added, source=epmc, pmcid=success, provenance added, aam set assert r2.licence_type == "cc-by" assert r2.licence_source == "epmc" assert r2.oag_pmcid == "success" assert r2.aam_from_epmc is True assert r2.aam is True provs = [n for b, w, n in r2.provenance] assert len(provs) == 2 assert "PMC1234 - Provenance PMC1234" in provs assert "Detected AAM status from EPMC web page" in provs
def test_03_handle_oag_response_01_pmcid_success(self): # first make ourselves a job/record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier": [{ "id": "PMC1234", "type": "pmcid" }], "license": [{ "type": "cc-by", "provenance": { "accepted_author_manuscript": True, # FIXME: provisional "description": "Provenance PMC1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should not have added anything to the rerun assert len(oag_rerun) == 0 # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next() assert isinstance(r2, models.Record) assert r2.id == record.id assert r2.pmcid == "PMC1234" # licence added, source=epmc, pmcid=success, provenance added, aam set assert r2.licence_type == "cc-by" assert r2.licence_source == "epmc" assert r2.oag_pmcid == "success" assert r2.aam_from_epmc is True assert r2.aam is True provs = [n for b, w, n in r2.provenance] assert len(provs) == 2 assert "PMC1234 - Provenance PMC1234" in provs assert "Detected AAM status from EPMC web page" in provs
def test_03_handle_oag_response_02_pmcid_fto(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.doi = "10.1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier" : [{ "id" : "PMC1234", "type" : "pmcid" }], "license" : [{ "type" : "failed-to-obtain-license", "provenance" : { "accepted_author_manuscript" : True, # FIXME: provisional "description" : "FTO PMC1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should have added the DOI to the re-run assert len(oag_rerun) == 1 assert oag_rerun[0]["id"] == "10.1234" assert oag_rerun[0]["type"] == "doi" # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next() assert isinstance(r2, models.Record) # provenance added, pmcid=fto, aam set assert r2.licence_type is None assert r2.oag_pmcid == "fto" assert r2.aam_from_epmc is True assert r2.aam is True provs = [n for b, w, n in r2.provenance] assert len(provs) == 2 assert "PMC1234 - FTO PMC1234" in provs assert "Detected AAM status from EPMC web page" in provs
def test_03_handle_oag_response_02_pmcid_fto(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.doi = "10.1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier": [{ "id": "PMC1234", "type": "pmcid" }], "license": [{ "type": "failed-to-obtain-license", "provenance": { "accepted_author_manuscript": True, # FIXME: provisional "description": "FTO PMC1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should have added the DOI to the re-run assert len(oag_rerun) == 1 assert oag_rerun[0]["id"] == "10.1234" assert oag_rerun[0]["type"] == "doi" # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next() assert isinstance(r2, models.Record) # provenance added, pmcid=fto, aam set assert r2.licence_type is None assert r2.oag_pmcid == "fto" assert r2.aam_from_epmc is True assert r2.aam is True provs = [n for b, w, n in r2.provenance] assert len(provs) == 2 assert "PMC1234 - FTO PMC1234" in provs assert "Detected AAM status from EPMC web page" in provs
def test_03_handle_oag_response_04_pmcid_no_change(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.licence_type = "CC BY" record.aam = True record.aam_from_xml = True record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier" : [{ "id" : "PMC1234", "type" : "pmcid" }], "license" : [{ "type" : "failed-to-obtain-license", "provenance" : { "accepted_author_manuscript" : False, # FIXME: provisional "description" : "You won't see this PMC1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should not have added anything to the rerun assert len(oag_rerun) == 0 # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next() assert isinstance(r2, models.Record) # expecting no changes assert r2.licence_type == "CC BY" assert r2.licence_source is None assert r2.oag_pmcid is None assert r2.aam_from_epmc is False assert r2.aam is True provs = [n for b, w, n in r2.provenance] assert len(provs) == 0
def test_03_handle_oag_response_04_pmcid_no_change(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.licence_type = "CC BY" record.aam = True record.aam_from_xml = True record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier": [{ "id": "PMC1234", "type": "pmcid" }], "license": [{ "type": "failed-to-obtain-license", "provenance": { "accepted_author_manuscript": False, # FIXME: provisional "description": "You won't see this PMC1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should not have added anything to the rerun assert len(oag_rerun) == 0 # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next() assert isinstance(r2, models.Record) # expecting no changes assert r2.licence_type == "CC BY" assert r2.licence_source is None assert r2.oag_pmcid is None assert r2.aam_from_epmc is False assert r2.aam is True provs = [n for b, w, n in r2.provenance] assert len(provs) == 0
def test_03_handle_oag_response_07_doi_fto(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.doi = "10.1234" record.pmid = "1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier" : [{ "id" : "10.1234", "type" : "doi" }], "license" : [{ "type" : "failed-to-obtain-license", "provenance" : { "description" : "FTO 10.1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should have added the DOI to the re-run assert len(oag_rerun) == 1 assert oag_rerun[0]["id"] == "1234" assert oag_rerun[0]["type"] == "pmid" # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("10.1234", job.id).next() assert isinstance(r2, models.Record) # provenance added, doi=fto, pmid reprocess assert r2.licence_type is None assert r2.oag_doi == "fto" assert r2.aam is None provs = [n for b, w, n in r2.provenance] assert len(provs) == 1 assert "10.1234 - FTO 10.1234" in provs
def test_03_handle_oag_response_07_doi_fto(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.doi = "10.1234" record.pmid = "1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier": [{ "id": "10.1234", "type": "doi" }], "license": [{ "type": "failed-to-obtain-license", "provenance": { "description": "FTO 10.1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should have added the DOI to the re-run assert len(oag_rerun) == 1 assert oag_rerun[0]["id"] == "1234" assert oag_rerun[0]["type"] == "pmid" # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("10.1234", job.id).next() assert isinstance(r2, models.Record) # provenance added, doi=fto, pmid reprocess assert r2.licence_type is None assert r2.oag_doi == "fto" assert r2.aam is None provs = [n for b, w, n in r2.provenance] assert len(provs) == 1 assert "10.1234 - FTO 10.1234" in provs
def test_03_handle_oag_response_06_doi_success(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.doi = "10.1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier": [{ "id": "10.1234", "type": "doi" }], "license": [{ "type": "cc-by", "provenance": { "description": "Provenance 10.1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should not have added anything to the rerun assert len(oag_rerun) == 0 # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier( "10.1234", job.id).next() # leave out the "doi" type just for the hell of it assert isinstance(r2, models.Record) # licence added, source=publisher, doi=success, provenance added assert r2.licence_type == "cc-by" assert r2.licence_source == "publisher" assert r2.oag_doi == "success" assert r2.aam is None provs = [n for b, w, n in r2.provenance] assert len(provs) == 1 assert "10.1234 - Provenance 10.1234" in provs
def test_03_handle_oag_response_09_pmid_success(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmid = "1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier" : [{ "id" : "1234", "type" : "pmid" }], "license" : [{ "type" : "cc-by", "provenance" : { "description" : "Provenance 1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should not have added anything to the rerun assert len(oag_rerun) == 0 # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("1234", job.id, "pmid").next() assert isinstance(r2, models.Record) # licence added, source=publisher, doi=success, provenance added assert r2.licence_type == "cc-by" assert r2.licence_source == "publisher" assert r2.oag_pmid == "success" assert r2.aam is None provs = [n for b, w, n in r2.provenance] assert len(provs) == 1 assert "1234 - Provenance 1234" in provs
def test_03_handle_oag_response_05_pmcid_error(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.pmid = "1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier" : { "id" : "PMC1234", "type" : "pmcid" }, "error" : "broken!" } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should have added the PMID to the re-run assert len(oag_rerun) == 1 assert oag_rerun[0]["id"] == "1234" assert oag_rerun[0]["type"] == "pmid" # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next() assert isinstance(r2, models.Record) # provenance added, pmcid=error, pmid reprocess assert r2.licence_type is None assert r2.oag_pmcid == "error" provs = [n for b, w, n in r2.provenance] assert len(provs) == 1 assert "PMC1234 - broken!" in provs
def test_03_handle_oag_response_05_pmcid_error(self): # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.pmid = "1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier": { "id": "PMC1234", "type": "pmcid" }, "error": "broken!" } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # should have added the PMID to the re-run assert len(oag_rerun) == 1 assert oag_rerun[0]["id"] == "1234" assert oag_rerun[0]["type"] == "pmid" # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id, "pmcid").next() assert isinstance(r2, models.Record) # provenance added, pmcid=error, pmid reprocess assert r2.licence_type is None assert r2.oag_pmcid == "error" provs = [n for b, w, n in r2.provenance] assert len(provs) == 1 assert "PMC1234 - broken!" in provs
def test_12_licence_translate(self): assert workflow.translate_licence_type( "free-to-read") == "non-standard-licence" # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier": [{ "id": "PMC1234", "type": "pmcid" }], "license": [{ "type": "free-to-read", "provenance": { "accepted_author_manuscript": False, # FIXME: provisional "description": "FtR PMC1234" } }] } oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id).next() assert isinstance(r2, models.Record) assert r2.licence_type == "non-standard-licence"
def test_12_licence_translate(self): assert workflow.translate_licence_type("free-to-read") == "non-standard-licence" # first make ourselves a record that we want to enhance job = models.SpreadsheetJob() job.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier" : [{ "id" : "PMC1234", "type" : "pmcid" }], "license" : [{ "type" : "free-to-read", "provenance" : { "accepted_author_manuscript" : False, # FIXME: provisional "description" : "FtR PMC1234" } }] } oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # give the index a moment to catch up time.sleep(2) r2 = models.Record.get_by_identifier("PMC1234", job.id).next() assert isinstance(r2, models.Record) assert r2.licence_type == "non-standard-licence"
def test_14_oag_record_callback_duplicate(self): # first make ourselves a job/record that we want to enhance job = models.SpreadsheetJob() job.save() # make two distinct records with the same ids record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.save() record = models.Record() record.upload_id = job.id record.pmcid = "PMC1234" record.save() time.sleep(2) # construct the OAG response object, which has detected a licence oag_result = { "identifier": [{ "id": "PMC1234", "type": "pmcid" }], "license": [{ "type": "cc-by", "provenance": { "accepted_author_manuscript": True, "description": "Provenance PMC1234" } }] } # call the oag record callback oag_rerun = [] workflow.oag_record_callback(oag_result, oag_rerun, job) # give the index a moment to catch up time.sleep(2) # read the duplicate records out of the index records = [ r for r in models.Record.get_by_identifier("PMC1234", job.id, "pmcid") ] # there should be 2 of them assert len(records) == 2 for record in records: assert isinstance(r, models.Record) # both records should have the same data # licence added, source=epmc, pmcid=success, provenance added, aam set assert record.licence_type == "cc-by" assert record.licence_source == "epmc" assert record.oag_pmcid == "success" assert record.aam_from_epmc is True assert record.aam is True provs = [n for b, w, n in record.provenance] assert len(provs) == 2 assert "PMC1234 - Provenance PMC1234" in provs assert "Detected AAM status from EPMC web page" in provs assert record.oag_complete is True