def applications_inconsistencies(outfile_later, outfile_missing, conn):
    with codecs.open(outfile_later, "wb", "utf-8") as f, codecs.open(outfile_missing, "wb", "utf-8") as g:

        out_later = csv.writer(f)
        out_later.writerow(["Application ID", "Application Last Updated", "Latest Provenance Recorded", "Difference"])

        out_missing = UnicodeWriter(g)
        out_missing.writerow(["Application ID", "Application Last Manual Update", "Latest Provenance Record", "ISSNs", "Title"])

        counter = 0
        for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"):
            counter += 1
            application = Suggestion(**result)
            print counter, application.id

            # Part 1 - later provenance records exist
            latest_prov = Provenance.get_latest_by_resource_id(application.id)
            if latest_prov is not None:
                lustamp = adjust_timestamp(application.last_updated_timestamp, APP_TIMEZONE_CUTOFF)
                created = latest_prov.created_date
                pstamp = latest_prov.created_timestamp
                td = pstamp - lustamp
                diff = td.total_seconds()

                if diff > THRESHOLD:
                    out_later.writerow([application.id, application.last_updated, created, diff])

            # Part 2 - missing journals
            if application.application_status == constants.APPLICATION_STATUS_ACCEPTED:
                missing = False

                # find the matching journals by issn or by title
                matching_journals = Journal.find_by_issn(application.bibjson().issns())
                if len(matching_journals) == 0:
                    # Have another go, find by title
                    matching_journals = Journal.find_by_title(application.bibjson().title)

                # if there are no matching journals, it is missing.
                if len(matching_journals) == 0:
                    missing = True
                else:
                    # if there are matching journals, find out if any of them are in the doaj.  If none, then journal is still missing
                    those_in_doaj = len([j for j in matching_journals if j.is_in_doaj()])
                    if those_in_doaj == 0:
                        missing = True

                # if the journal is missing, record it
                if missing:
                    created = ""
                    if latest_prov is not None:
                        created = latest_prov.created_date
                    out_missing.writerow([application.id, application.last_manual_update, created, " ".join(application.bibjson().issns()), application.bibjson().title])

        print "processed", counter, "suggestions"
示例#2
0
    def make_application_spread(cls, desired_output, period):
        desired_output = deepcopy(desired_output)
        header = desired_output[0]
        del desired_output[0]
        del header[0]
        ranges = []
        for h in header:
            start = None
            end = None
            if period == "month":
                startts = dates.parse(h, "%Y-%m")
                year, month = divmod(startts.month+1, 12)
                if month == 0:
                    month = 12
                    year = year - 1
                endts = datetime(startts.year + year, month, 1)
                start = dates.format(startts)
                end = dates.format(endts)
            elif period == "year":
                startts = dates.parse(h, "%Y")
                endts = datetime(startts.year + 1, 1, 1)
                start = dates.format(startts)
                end = dates.format(endts)

            ranges.append((start, end))

        apps = []
        for row in desired_output:
            country = row[0]
            del row[0]
            for i in range(len(row)):
                count = row[i]
                start, end = ranges[i]
                for j in range(count):
                    s = Suggestion()
                    s.set_created(dates.random_date(start, end))
                    s.bibjson().country = country
                    apps.append(s)

        return apps
def applications_inconsistencies(outfile_later, outfile_missing, conn):
    with codecs.open(outfile_later, "wb",
                     "utf-8") as f, codecs.open(outfile_missing, "wb",
                                                "utf-8") as g:

        out_later = csv.writer(f)
        out_later.writerow([
            "Application ID", "Application Last Updated",
            "Latest Provenance Recorded", "Difference"
        ])

        out_missing = UnicodeWriter(g)
        out_missing.writerow([
            "Application ID", "Application Last Manual Update",
            "Latest Provenance Record", "ISSNs", "Title"
        ])

        counter = 0
        for result in esprit.tasks.scroll(conn, "suggestion", keepalive="45m"):
            counter += 1
            application = Suggestion(**result)
            print counter, application.id

            # Part 1 - later provenance records exist
            latest_prov = Provenance.get_latest_by_resource_id(application.id)
            if latest_prov is not None:
                lustamp = adjust_timestamp(application.last_updated_timestamp,
                                           APP_TIMEZONE_CUTOFF)
                created = latest_prov.created_date
                pstamp = latest_prov.created_timestamp
                td = pstamp - lustamp
                diff = td.total_seconds()

                if diff > THRESHOLD:
                    out_later.writerow([
                        application.id, application.last_updated, created, diff
                    ])

            # Part 2 - missing journals
            if application.application_status == constants.APPLICATION_STATUS_ACCEPTED:
                missing = False

                # find the matching journals by issn or by title
                matching_journals = Journal.find_by_issn(
                    application.bibjson().issns())
                if len(matching_journals) == 0:
                    # Have another go, find by title
                    matching_journals = Journal.find_by_title(
                        application.bibjson().title)

                # if there are no matching journals, it is missing.
                if len(matching_journals) == 0:
                    missing = True
                else:
                    # if there are matching journals, find out if any of them are in the doaj.  If none, then journal is still missing
                    those_in_doaj = len(
                        [j for j in matching_journals if j.is_in_doaj()])
                    if those_in_doaj == 0:
                        missing = True

                # if the journal is missing, record it
                if missing:
                    created = ""
                    if latest_prov is not None:
                        created = latest_prov.created_date
                    out_missing.writerow([
                        application.id, application.last_manual_update,
                        created, " ".join(application.bibjson().issns()),
                        application.bibjson().title
                    ])

        print "processed", counter, "suggestions"
示例#4
0
        "application", "app_created", "app_last_update",
        "app_last_manual_update", "app_adjusted_lmu", "app_issns",
        "journal_matches", "journal", "journal_created", "journal_reapp",
        "journal_issns", "jc_ac_diff", "jc_lmua_diff", "mc1", "lra_ac_diff",
        "mc2", "is_match", "reason"
    ])

    # first, get each application and consider it
    counter = 0
    for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"):
        counter += 1
        application = Suggestion(**result)
        application.remove_related_journal()

        # find all the journals that this application could be associated with (which we need to do by issn)
        issns = application.bibjson().issns()

        # query by each issn individually, because we're looking for the widest possible map.  Querying by
        # both would require both issns match
        related_journals = []
        related_journal_ids = []
        for issn in issns:
            journals = Journal.find_by_issn(issn)
            for journal in journals:
                if journal.id not in related_journal_ids:
                    related_journal_ids.append(journal.id)
                    related_journals.append(journal)

        if len(related_journals) > 0:
            # sort the journals by their created date
            related_journals = sorted(related_journals,
示例#5
0
    writer.writerow([
        "application", "app_created", "app_last_update", "app_last_manual_update", "app_adjusted_lmu", "app_issns",
        "journal_matches", "journal", "journal_created", "journal_reapp", "journal_issns",
        "jc_ac_diff", "jc_lmua_diff", "mc1", "lra_ac_diff", "mc2",
        "is_match", "reason"
    ])

    # first, get each application and consider it
    counter = 0
    for result in esprit.tasks.scroll(conn, "suggestion", keepalive="1m"):
        counter += 1
        application = Suggestion(**result)
        application.remove_related_journal()

        # find all the journals that this application could be associated with (which we need to do by issn)
        issns = application.bibjson().issns()

        # query by each issn individually, because we're looking for the widest possible map.  Querying by
        # both would require both issns match
        related_journals = []
        related_journal_ids = []
        for issn in issns:
            journals = Journal.find_by_issn(issn)
            for journal in journals:
                if journal.id not in related_journal_ids:
                    related_journal_ids.append(journal.id)
                    related_journals.append(journal)

        if len(related_journals) > 0:
            # sort the journals by their created date
            related_journals = sorted(related_journals, key=lambda j: j.created_timestamp)
示例#6
0
    def test_02_application_2_journal(self, name, application_type, manual_update_arg, app_key_properties, current_journal, raises):
        # set up for the test
        #########################################

        cj = None
        has_seal = bool(randint(0, 1))
        application = None
        if application_type == "present":
            application = Suggestion(**ApplicationFixtureFactory.make_application_source())
            application.set_id(application.makeid())
            application.remove_contacts()
            application.remove_editor_group()
            application.remove_editor()
            application.remove_owner()
            application.remove_current_journal()
            application.remove_notes()

            if app_key_properties == "yes":
                application.add_contact("Application", "*****@*****.**")
                application.set_editor_group("appeditorgroup")
                application.set_editor("appeditor")
                application.set_owner("appowner")

            application.set_seal(has_seal)
            application.add_note("Application Note")

            if current_journal == "present":
                journal = Journal(**JournalFixtureFactory.make_journal_source())
                journal.remove_contacts()
                journal.add_contact("Journal", "*****@*****.**")
                journal.set_editor_group("journaleditorgroup")
                journal.set_editor("journaleditor")
                journal.set_owner("journalowner")
                journal.remove_current_application()
                journal.remove_notes()
                journal.add_note("Journal Note")
                journal.save(blocking=True)
                application.set_current_journal(journal.id)
                cj = journal
            elif current_journal == "missing":
                application.set_current_journal("123456789987654321")

        manual_update = None
        if manual_update_arg == "true":
            manual_update = True
        elif manual_update_arg == "false":
            manual_update = False

        # execute the test
        ########################################

        svc = DOAJ.applicationService()
        if raises is not None and raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.application_2_journal(application, manual_update)
        else:
            journal = svc.application_2_journal(application, manual_update)

            # check the result
            ######################################

            assert journal is not None
            assert isinstance(journal, Journal)
            assert journal.is_in_doaj() is True

            jbj = journal.bibjson().data
            del jbj["active"]
            assert jbj == application.bibjson().data

            if current_journal == "present":
                assert len(journal.related_applications) == 3
            else:
                assert len(journal.related_applications) == 1
            related = journal.related_application_record(application.id)
            assert related is not None

            if manual_update_arg == "true":
                assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z"

            if app_key_properties == "yes":
                contacts = journal.contacts()
                assert len(contacts) == 1
                assert contacts[0].get("name") == "Application"
                assert contacts[0].get("email") == "*****@*****.**"
                assert journal.editor_group == "appeditorgroup"
                assert journal.editor == "appeditor"
                assert journal.owner == "appowner"
                assert journal.has_seal() == has_seal

                if current_journal == "present":
                    assert len(journal.notes) == 2
                else:
                    assert len(journal.notes) == 1

            elif app_key_properties == "no":
                if current_journal == "present":
                    contacts = journal.contacts()
                    assert len(contacts) == 1
                    assert contacts[0].get("name") == "Journal"
                    assert contacts[0].get("email") == "*****@*****.**"
                    assert journal.editor_group == "journaleditorgroup"
                    assert journal.editor == "journaleditor"
                    assert journal.owner == "journalowner"
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 2

                elif current_journal == "none" or current_journal == "missing":
                    contacts = journal.contacts()
                    assert len(contacts) == 0
                    assert journal.editor_group is None
                    assert journal.editor is None
                    assert journal.owner is None
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 1

            if current_journal == "present":
                assert cj.id == journal.id
                assert cj.created_date == journal.created_date
    def test_02_application_2_journal(self, name, application_type,
                                      manual_update_arg, app_key_properties,
                                      current_journal, raises):
        # set up for the test
        #########################################

        cj = None
        has_seal = bool(randint(0, 1))
        application = None
        if application_type == "present":
            application = Suggestion(
                **ApplicationFixtureFactory.make_application_source())
            application.set_id(application.makeid())
            application.remove_contacts()
            application.remove_editor_group()
            application.remove_editor()
            application.remove_owner()
            application.remove_current_journal()
            application.remove_notes()

            if app_key_properties == "yes":
                application.add_contact("Application",
                                        "*****@*****.**")
                application.set_editor_group("appeditorgroup")
                application.set_editor("appeditor")
                application.set_owner("appowner")

            application.set_seal(has_seal)
            application.add_note("Application Note")

            if current_journal == "present":
                journal = Journal(
                    **JournalFixtureFactory.make_journal_source())
                journal.remove_contacts()
                journal.add_contact("Journal", "*****@*****.**")
                journal.set_editor_group("journaleditorgroup")
                journal.set_editor("journaleditor")
                journal.set_owner("journalowner")
                journal.remove_current_application()
                journal.remove_notes()
                journal.add_note("Journal Note")
                journal.save(blocking=True)
                application.set_current_journal(journal.id)
                cj = journal
            elif current_journal == "missing":
                application.set_current_journal("123456789987654321")

        manual_update = None
        if manual_update_arg == "true":
            manual_update = True
        elif manual_update_arg == "false":
            manual_update = False

        # execute the test
        ########################################

        svc = DOAJ.applicationService()
        if raises is not None and raises != "":
            with self.assertRaises(EXCEPTIONS[raises]):
                svc.application_2_journal(application, manual_update)
        else:
            journal = svc.application_2_journal(application, manual_update)

            # check the result
            ######################################

            assert journal is not None
            assert isinstance(journal, Journal)
            assert journal.is_in_doaj() is True

            jbj = journal.bibjson().data
            del jbj["active"]
            assert jbj == application.bibjson().data

            if current_journal == "present":
                assert len(journal.related_applications) == 3
            else:
                assert len(journal.related_applications) == 1
            related = journal.related_application_record(application.id)
            assert related is not None

            if manual_update_arg == "true":
                assert journal.last_manual_update is not None and journal.last_manual_update != "1970-01-01T00:00:00Z"

            if app_key_properties == "yes":
                contacts = journal.contacts()
                assert len(contacts) == 1
                assert contacts[0].get("name") == "Application"
                assert contacts[0].get("email") == "*****@*****.**"
                assert journal.editor_group == "appeditorgroup"
                assert journal.editor == "appeditor"
                assert journal.owner == "appowner"
                assert journal.has_seal() == has_seal

                if current_journal == "present":
                    assert len(journal.notes) == 2
                else:
                    assert len(journal.notes) == 1

            elif app_key_properties == "no":
                if current_journal == "present":
                    contacts = journal.contacts()
                    assert len(contacts) == 1
                    assert contacts[0].get("name") == "Journal"
                    assert contacts[0].get("email") == "*****@*****.**"
                    assert journal.editor_group == "journaleditorgroup"
                    assert journal.editor == "journaleditor"
                    assert journal.owner == "journalowner"
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 2

                elif current_journal == "none" or current_journal == "missing":
                    contacts = journal.contacts()
                    assert len(contacts) == 0
                    assert journal.editor_group is None
                    assert journal.editor is None
                    assert journal.owner is None
                    assert journal.has_seal() == has_seal
                    assert len(journal.notes) == 1

            if current_journal == "present":
                assert cj.id == journal.id
                assert cj.created_date == journal.created_date