Пример #1
0
    def check_jobs(cls):
        """
        Check any existing Lantern jobs for progress, and process any that have completed

        :return:
        """
        dao = LanternJob()

        delay = app.config.get("JOB_LOOKUP_DELAY_LANTERN", 3600)
        cutoff = dates.before_now(delay)
        gen = dao.list_active(cutoff, keepalive="30m")

        for job in gen:
            acc = MonitorUKAccount.pull(job.account)
            lc = client.Lantern(api_key=acc.lantern_api_key)
            prog = lc.get_progress(job.job_id)
            if prog.get("status") == "success":
                pc = prog.get("data",  {}).get("progress", 0)
                if pc != 100:
                    # this will update the last_updated date, which means we won't look at it again for a while
                    job.save()
                    continue

                # if we get here, the job is complete so we need to retrieve it
                results = lc.get_results(job.job_id)
                if results.get("status") == "success":
                    for res in results.get("data", []):
                        enhancement = LanternApi._xwalk(res)
                        enhancement.save()

                # set the job as complete
                job.status = "complete"
                job.save()
Пример #2
0
    def _needs_lantern_data(cls, apc):
        """
        Check to see if the PublicAPC record could benefit from Lantern lookup

        :param apc:
        :return:
        """
        refresh = app.config.get("DATA_REFRESH_LANTERN", 15552000)
        cutoff = dates.before_now(refresh)
        ds = apc.lantern_lookup_datestamp
        if ds is not None and ds > cutoff:
            return False

        fields = app.config.get("MISSING_FIELD_TRIGGERS_LANTERN", [])
        for field in fields:
            vals = apc.objectpath(field)
            if vals is None:
                return True
            hasVal = False
            for v in vals:
                if v:      # not empty string, None or False
                    hasVal = True
            if not hasVal:
                return True
        return False
Пример #3
0
    def test_05_low_quota(self):
        # Check what happens when the use has a low quota on Lantern

        global QUOTA
        QUOTA = 1

        acc2 = MonitorUKAccount()
        acc2.email = "*****@*****.**"
        acc2.lantern_email = "*****@*****.**"
        acc2.lantern_api_key = "123456789"
        acc2.save()

        # a record that needs lantern because of a missing field
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        del source["admin"]["lantern_lookup"]
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because it has timed out and has a missing field
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save(blocking=True)

        LanternApi.make_new_jobs()

        time.sleep(2)

        dao = LanternJob()
        jobs = [job for job in dao.iterall()]
        assert len(jobs) == 1

        assert len(CREATED_JOBS) == 1
        job = CREATED_JOBS[0]

        assert job["email"] == "*****@*****.**"
        assert len(job["list"]) == 1
Пример #4
0
    def test_07_check_jobs(self):
        # Ensure that we can check existing jobs correctly

        acc = MonitorUKAccount()
        acc.email = "*****@*****.**"
        acc.lantern_email = "*****@*****.**"
        acc.lantern_api_key = "123456789"
        acc.save()

        lj1 = LanternJob()
        lj1.job_id = "111111111"
        lj1.account = acc.id
        lj1.status = "complete"
        lj1.save()

        lj2 = LanternJob()
        lj2.job_id = "222222222"
        lj2.account = acc.id
        lj2.status = "active"
        lj2.last_updated = dates.format(dates.before_now(5000))
        lj2.save(updated=False)

        lj3 = LanternJob()
        lj3.job_id = "333333333"
        lj3.account = acc.id
        lj3.status = "active"
        lj3.last_updated = dates.format(dates.before_now(5000))
        lj3.save(updated=False)

        lj4 = LanternJob()
        lj4.job_id = "444444444"
        lj4.account = acc.id
        lj4.status = "active"
        lj4.last_updated = dates.format(dates.before_now(5000))
        lj4.save(updated=False)

        lj5 = LanternJob()
        lj5.job_id = "555555555"
        lj5.account = acc.id
        lj5.status = "active"
        lj5.save(blocking=True)

        LanternApi.check_jobs()

        # check that the progress requests we expected were made
        assert len(PROGRESS_REQUESTS) == 3
        assert "222222222" in PROGRESS_REQUESTS
        assert "333333333" in PROGRESS_REQUESTS
        assert "444444444" in PROGRESS_REQUESTS

        # check that the job which received an error was just ignored
        dao = LanternJob()
        ignored = dao.pull(lj4.id)
        assert ignored.last_updated == lj4.last_updated
        assert ignored.status == "active"

        # check that the record which was not complete was touched
        touched = dao.pull(lj2.id)
        assert touched.last_updated != lj2.last_updated
        assert touched.status == "active"

        # check that results were requested only for one item
        assert len(RESULTS_REQUESTS) == 1
        assert "333333333" in RESULTS_REQUESTS

        # wait for a bit, so that enhancements have time to go in
        time.sleep(2)

        # check that an enhancement was registered
        edao = Enhancement()
        gen = edao.iterall()
        enhancements = [e for e in gen]

        assert len(enhancements) == 1

        result = LanternFixtureFactory.xwalk_result()
        assert enhancements[0].data["record"] == result["record"]
Пример #5
0
    def test_04_create_job(self):
        # Check we can create jobs correctly

        acc1 = MonitorUKAccount()
        acc1.email = "*****@*****.**"
        acc1.save()

        acc2 = MonitorUKAccount()
        acc2.email = "*****@*****.**"
        acc2.lantern_email = "*****@*****.**"
        acc2.lantern_api_key = "123456789"
        acc2.save()

        acc3 = MonitorUKAccount()
        acc3.email = "*****@*****.**"
        acc3.lantern_email = "*****@*****.**"
        acc3.lantern_api_key = "987654321"
        acc3.save(blocking=True)

        # a record which does not need lantern
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.now()
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because of a missing field
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        del source["admin"]["lantern_lookup"]
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because it has timed out and has a missing field
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern but has no identifiers
        source = PublicAPCFixtureFactory.make_record(acc2.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        del source["record"]["dc:identifier"]
        pub = PublicAPC(source)
        pub.save()

        # a record which does not need lantern
        source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.now()
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because of a missing field
        source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None)
        del source["admin"]["lantern_lookup"]
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern because it has timed out and has a missing field
        source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        pub = PublicAPC(source)
        pub.save()

        # a record that needs lantern but has no identifiers
        source = PublicAPCFixtureFactory.make_record(acc3.id, None, None, None)
        source["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        del source["record"]["rioxxterms:publication_date"]
        del source["record"]["dc:identifier"]
        pub = PublicAPC(source)
        pub.save(blocking=True)

        LanternApi.make_new_jobs()

        time.sleep(2)

        dao = LanternJob()
        jobs = [job for job in dao.iterall()]
        assert len(jobs) == 2

        assert len(CREATED_JOBS) == 2
        count = 0
        for job in CREATED_JOBS:
            if job["email"] == "*****@*****.**":
                count += 1
                assert len(job["list"]) == 2
            elif job["email"] == "*****@*****.**":
                count += 10
                assert len(job["list"]) == 2
        assert count == 11

        # now do the same thing again.  The jobs should not change, as we've already created jobs
        # for all the public records
        LanternApi.make_new_jobs()
        time.sleep(2)
        jobs = [job for job in dao.iterall()]
        assert len(jobs) == 2
Пример #6
0
    def test_01_needs_lantern(self):
        # Check that we can tell when an object needs to be looked up in Lantern

        apc = {
            "record" : {
                "rioxxterms:publication_date" : "2001-01-01",
                "rioxxterms:version" : "AAM",
                "dc:source" : {
                    "name" : "Journal Title",
                    "identifier" : [
                        {"type" : "issn", "id" : "XXXX-XXXX"}
                    ],
                    "oa_type" : "hybrid",
                    "self_archiving" : {
                        "preprint" : {
                            "embargo" : 10,
                            "policy" : "can"
                        },
                        "postprint" : {
                            "embargo" : 20,
                            "policy" : "cannot"
                        },
                        "publisher" : {
                            "embargo" : 30,
                            "policy" : "maybe"
                        }
                    }
                },
                "rioxxterms:project" : [
                    {"funder_name" : "BBSRC"}
                ],
                "ali:license_ref" : [
                    { "type" : "CC BY" }
                ],
                "jm:repository" : [
                    {"repo_name" : "arXiv"}
                ]
            }
        }
        obj = PublicAPC(apc)

        needs = LanternApi._needs_lantern_data(obj)
        assert needs is False

        clone = deepcopy(apc)
        clone["record"]["rioxxterms:publication_date"] = ""
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["rioxxterms:version"] = ""
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["dc:source"]["name"] = ""
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["dc:source"]["identifier"][0]["type"] = "other"
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        del clone["record"]["dc:source"]["oa_type"]
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["dc:source"]["self_archiving"]["preprint"]["embargo"] = None
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["dc:source"]["self_archiving"]["preprint"]["policy"] = None
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["dc:source"]["self_archiving"]["postprint"]["embargo"] = None
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["dc:source"]["self_archiving"]["postprint"]["policy"] = None
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["dc:source"]["self_archiving"]["publisher"]["embargo"] = None
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        clone["record"]["dc:source"]["self_archiving"]["publisher"]["policy"] = None
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        del clone["record"]["rioxxterms:project"][0]["funder_name"]
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        del clone["record"]["ali:license_ref"][0]["type"]
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        clone = deepcopy(apc)
        del clone["record"]["jm:repository"][0]["repo_name"]
        cobj = PublicAPC(clone)
        needs = LanternApi._needs_lantern_data(cobj)
        assert needs is True

        # now check that having a date cut-off works
        apc = {
            "admin" : {
                "lantern_lookup" : dates.now()
            }
        }
        obj = PublicAPC(apc)
        needs = LanternApi._needs_lantern_data(obj)
        assert needs is False

        apc["admin"]["lantern_lookup"] = dates.format(dates.before_now(31104000))   # a year ago
        obj = PublicAPC(apc)
        needs = LanternApi._needs_lantern_data(obj)
        assert needs is True