def check_jobs(cls): """ Check any existing Lantern jobs for progress, and process any that have completed :return: """ dao = LanternJob() delay = app.config.get("JOB_LOOKUP_DELAY_LANTERN", 3600) cutoff = dates.before_now(delay) gen = dao.list_active(cutoff, keepalive="30m") for job in gen: acc = MonitorUKAccount.pull(job.account) lc = client.Lantern(api_key=acc.lantern_api_key) prog = lc.get_progress(job.job_id) if prog.get("status") == "success": pc = prog.get("data", {}).get("progress", 0) if pc != 100: # this will update the last_updated date, which means we won't look at it again for a while job.save() continue # if we get here, the job is complete so we need to retrieve it results = lc.get_results(job.job_id) if results.get("status") == "success": for res in results.get("data", []): enhancement = LanternApi._xwalk(res) enhancement.save() # set the job as complete job.status = "complete" job.save()
def _needs_lantern_data(cls, apc): """ Check to see if the PublicAPC record could benefit from Lantern lookup :param apc: :return: """ refresh = app.config.get("DATA_REFRESH_LANTERN", 15552000) cutoff = dates.before_now(refresh) ds = apc.lantern_lookup_datestamp if ds is not None and ds > cutoff: return False fields = app.config.get("MISSING_FIELD_TRIGGERS_LANTERN", []) for field in fields: vals = apc.objectpath(field) if vals is None: return True hasVal = False for v in vals: if v: # not empty string, None or False hasVal = True if not hasVal: return True return False
def test_05_low_quota(self): # Check what happens when the use has a low quota on Lantern global QUOTA QUOTA = 1 acc2 = MonitorUKAccount() acc2.email = "*****@*****.**" acc2.lantern_email = "*****@*****.**" acc2.lantern_api_key = "123456789" acc2.save() # a record that needs lantern because of a missing field source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) del source["admin"]["lantern_lookup"] del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern because it has timed out and has a missing field source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save(blocking=True) LanternApi.make_new_jobs() time.sleep(2) dao = LanternJob() jobs = [job for job in dao.iterall()] assert len(jobs) == 1 assert len(CREATED_JOBS) == 1 job = CREATED_JOBS[0] assert job["email"] == "*****@*****.**" assert len(job["list"]) == 1
def test_07_check_jobs(self): # Ensure that we can check existing jobs correctly acc = MonitorUKAccount() acc.email = "*****@*****.**" acc.lantern_email = "*****@*****.**" acc.lantern_api_key = "123456789" acc.save() lj1 = LanternJob() lj1.job_id = "111111111" lj1.account = acc.id lj1.status = "complete" lj1.save() lj2 = LanternJob() lj2.job_id = "222222222" lj2.account = acc.id lj2.status = "active" lj2.last_updated = dates.format(dates.before_now(5000)) lj2.save(updated=False) lj3 = LanternJob() lj3.job_id = "333333333" lj3.account = acc.id lj3.status = "active" lj3.last_updated = dates.format(dates.before_now(5000)) lj3.save(updated=False) lj4 = LanternJob() lj4.job_id = "444444444" lj4.account = acc.id lj4.status = "active" lj4.last_updated = dates.format(dates.before_now(5000)) lj4.save(updated=False) lj5 = LanternJob() lj5.job_id = "555555555" lj5.account = acc.id lj5.status = "active" lj5.save(blocking=True) LanternApi.check_jobs() # check that the progress requests we expected were made assert len(PROGRESS_REQUESTS) == 3 assert "222222222" in PROGRESS_REQUESTS assert "333333333" in PROGRESS_REQUESTS assert "444444444" in PROGRESS_REQUESTS # check that the job which received an error was just ignored dao = LanternJob() ignored = dao.pull(lj4.id) assert ignored.last_updated == lj4.last_updated assert ignored.status == "active" # check that the record which was not complete was touched touched = dao.pull(lj2.id) assert touched.last_updated != lj2.last_updated assert touched.status == "active" # check that results were requested only for one item assert len(RESULTS_REQUESTS) == 1 assert "333333333" in RESULTS_REQUESTS # wait for a bit, so that enhancements have time to go in time.sleep(2) # check that an enhancement was registered edao = Enhancement() gen = edao.iterall() enhancements = [e for e in gen] assert len(enhancements) == 1 result = LanternFixtureFactory.xwalk_result() assert enhancements[0].data["record"] == result["record"]
def test_04_create_job(self): # Check we can create jobs correctly acc1 = MonitorUKAccount() acc1.email = "*****@*****.**" acc1.save() acc2 = MonitorUKAccount() acc2.email = "*****@*****.**" acc2.lantern_email = "*****@*****.**" acc2.lantern_api_key = "123456789" acc2.save() acc3 = MonitorUKAccount() acc3.email = "*****@*****.**" acc3.lantern_email = "*****@*****.**" acc3.lantern_api_key = "987654321" acc3.save(blocking=True) # a record which does not need lantern source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) source["admin"]["lantern_lookup"] = dates.now() pub = PublicAPC(source) pub.save() # a record that needs lantern because of a missing field source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) del source["admin"]["lantern_lookup"] del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern because it has timed out and has a missing field source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern but has no identifiers source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] del source["record"]["dc:identifier"] pub = PublicAPC(source) pub.save() # a record which does not need lantern source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None) source["admin"]["lantern_lookup"] = dates.now() pub = PublicAPC(source) pub.save() # a record that needs lantern because of a missing field source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None) del source["admin"]["lantern_lookup"] del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern because it has timed out and has a missing field source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] pub = PublicAPC(source) pub.save() # a record that needs lantern but has no identifiers source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None) source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago del source["record"]["rioxxterms:publication_date"] del source["record"]["dc:identifier"] pub = PublicAPC(source) pub.save(blocking=True) LanternApi.make_new_jobs() time.sleep(2) dao = LanternJob() jobs = [job for job in dao.iterall()] assert len(jobs) == 2 assert len(CREATED_JOBS) == 2 count = 0 for job in CREATED_JOBS: if job["email"] == "*****@*****.**": count += 1 assert len(job["list"]) == 2 elif job["email"] == "*****@*****.**": count += 10 assert len(job["list"]) == 2 assert count == 11 # now do the same thing again. The jobs should not change, as we've already created jobs # for all the public records LanternApi.make_new_jobs() time.sleep(2) jobs = [job for job in dao.iterall()] assert len(jobs) == 2
def test_01_needs_lantern(self): # Check that we can tell when an object needs to be looked up in Lantern apc = { "record" : { "rioxxterms:publication_date" : "2001-01-01", "rioxxterms:version" : "AAM", "dc:source" : { "name" : "Journal Title", "identifier" : [ {"type" : "issn", "id" : "XXXX-XXXX"} ], "oa_type" : "hybrid", "self_archiving" : { "preprint" : { "embargo" : 10, "policy" : "can" }, "postprint" : { "embargo" : 20, "policy" : "cannot" }, "publisher" : { "embargo" : 30, "policy" : "maybe" } } }, "rioxxterms:project" : [ {"funder_name" : "BBSRC"} ], "ali:license_ref" : [ { "type" : "CC BY" } ], "jm:repository" : [ {"repo_name" : "arXiv"} ] } } obj = PublicAPC(apc) needs = LanternApi._needs_lantern_data(obj) assert needs is False clone = deepcopy(apc) clone["record"]["rioxxterms:publication_date"] = "" cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["rioxxterms:version"] = "" cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["dc:source"]["name"] = "" cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["dc:source"]["identifier"][0]["type"] = "other" cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) del clone["record"]["dc:source"]["oa_type"] cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["dc:source"]["self_archiving"]["preprint"]["embargo"] = None cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["dc:source"]["self_archiving"]["preprint"]["policy"] = None cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["dc:source"]["self_archiving"]["postprint"]["embargo"] = None cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["dc:source"]["self_archiving"]["postprint"]["policy"] = None cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["dc:source"]["self_archiving"]["publisher"]["embargo"] = None cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) clone["record"]["dc:source"]["self_archiving"]["publisher"]["policy"] = None cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) del clone["record"]["rioxxterms:project"][0]["funder_name"] cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) del clone["record"]["ali:license_ref"][0]["type"] cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True clone = deepcopy(apc) del clone["record"]["jm:repository"][0]["repo_name"] cobj = PublicAPC(clone) needs = LanternApi._needs_lantern_data(cobj) assert needs is True # now check that having a date cut-off works apc = { "admin" : { "lantern_lookup" : dates.now() } } obj = PublicAPC(apc) needs = LanternApi._needs_lantern_data(obj) assert needs is False apc["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000)) # a year ago obj = PublicAPC(apc) needs = LanternApi._needs_lantern_data(obj) assert needs is True