def journals_applications_provenance(outfile_applications, outfile_accounts,
                                     outfile_reapps, conn):
    with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(
            outfile_accounts, "wb",
            "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h:
        out_applications = csv.writer(f)
        out_applications.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied",
            "Application ID", "Application Last Updated", "Application Status",
            "Published Diff", "Latest Edit Recorded",
            "Latest Accepted Recorded"
        ])

        out_accounts = csv.writer(g)
        out_accounts.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ",
            "Missing Account ID"
        ])

        out_reapps = csv.writer(h)
        out_reapps.writerow([
            "Journal ID", "Journal Created", "Journal Reapplied",
            "Application ID", "Application Created",
            "Application Last Updated", "Application Last Manual Update",
            "Application Status", "Published Diff"
        ])

        counter = 0
        for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"):
            counter += 1
            journal = Journal(**result)
            print counter, journal.id

            # first figure out if there is a broken related application
            issns = journal.bibjson().issns()
            applications = Suggestion.find_by_issn(issns)
            latest = None
            for application in applications:
                if latest is None:
                    latest = application
                if application.last_updated_timestamp > latest.last_updated_timestamp:
                    latest = application

            if latest is None:
                continue

            jcreated = journal.created_timestamp
            reapp = journal.last_update_request
            print counter, journal.id, reapp
            if reapp is not None:
                jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ")
            jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF)

            app_lustamp = adjust_timestamp(latest.last_updated_timestamp,
                                           APP_TIMEZONE_CUTOFF)
            # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one
            app_man_lustamp = adjust_timestamp(
                latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF)
            td = jcreated - app_lustamp
            mtd = jcreated - app_man_lustamp
            diff = td.total_seconds()
            mdiff = mtd.total_seconds()

            # was the journal created after the application by greater than the threshold?
            if diff > THRESHOLD:
                last_edit = ""
                last_accept = ""

                edit_query = deepcopy(PROV_QUERY)
                edit_query["query"]["bool"]["must"][0]["term"][
                    "resource_id.exact"] = latest.id
                edit_query["query"]["bool"]["must"][1]["term"][
                    "action.exact"] = "edit"
                provs = Provenance.q2obj(q=edit_query)
                if len(provs) > 0:
                    last_edit = provs[0].last_updated

                accept_query = deepcopy(PROV_QUERY)
                accept_query["query"]["bool"]["must"][0]["term"][
                    "resource_id.exact"] = latest.id
                accept_query["query"]["bool"]["must"][1]["term"][
                    "action.exact"] = "status:accepted"
                provs = Provenance.q2obj(q=accept_query)
                if len(provs) > 0:
                    last_accept = provs[0].last_updated

                out_applications.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request, latest.id,
                    latest.last_updated, latest.application_status, diff,
                    last_edit, last_accept
                ])

            # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected
            if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj(
            ):
                out_reapps.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request, latest.id,
                    latest.created_date, latest.last_updated,
                    latest.last_manual_update, latest.application_status, mdiff
                ])

            # now figure out if the account is missing
            owner = journal.owner
            if owner is None:
                out_accounts.writerow([
                    journal.id, journal.created_date,
                    journal.last_update_request,
                    str(journal.is_in_doaj()), "NO OWNER"
                ])
            else:
                acc = Account.pull(owner)
                if acc is None:
                    out_accounts.writerow([
                        journal.id, journal.created_date,
                        journal.last_update_request,
                        str(journal.is_in_doaj()), owner
                    ])

        print "processed", counter, "journals"
def journals_applications_provenance(outfile_applications, outfile_accounts, outfile_reapps, conn):
    with codecs.open(outfile_applications, "wb", "utf-8") as f, codecs.open(outfile_accounts, "wb", "utf-8") as g, codecs.open(outfile_reapps, "wb", "utf-8") as h:
        out_applications = csv.writer(f)
        out_applications.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Last Updated", "Application Status", "Published Diff", "Latest Edit Recorded", "Latest Accepted Recorded"])

        out_accounts = csv.writer(g)
        out_accounts.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "In DOAJ", "Missing Account ID"])

        out_reapps = csv.writer(h)
        out_reapps.writerow(["Journal ID", "Journal Created", "Journal Reapplied", "Application ID", "Application Created", "Application Last Updated", "Application Last Manual Update", "Application Status", "Published Diff"])

        counter = 0
        for result in esprit.tasks.scroll(conn, "journal", keepalive="45m"):
            counter += 1
            journal = Journal(**result)
            print counter, journal.id

            # first figure out if there is a broken related application
            issns = journal.bibjson().issns()
            applications = Suggestion.find_by_issn(issns)
            latest = None
            for application in applications:
                if latest is None:
                    latest = application
                if application.last_updated_timestamp > latest.last_updated_timestamp:
                    latest = application

            if latest is None:
                continue

            jcreated = journal.created_timestamp
            reapp = journal.last_update_request
            print counter, journal.id, reapp
            if reapp is not None:
                jcreated = datetime.strptime(reapp, "%Y-%m-%dT%H:%M:%SZ")
            jcreated = adjust_timestamp(jcreated, JOURNAL_TIMEZONE_CUTOFF)

            app_lustamp = adjust_timestamp(latest.last_updated_timestamp, APP_TIMEZONE_CUTOFF)
            # app_man_lustamp = latest.last_manual_update_timestamp # no need to adjust this one
            app_man_lustamp = adjust_timestamp(latest.last_manual_update_timestamp, APP_TIMEZONE_CUTOFF)
            td = jcreated - app_lustamp
            mtd = jcreated - app_man_lustamp
            diff = td.total_seconds()
            mdiff = mtd.total_seconds()

            # was the journal created after the application by greater than the threshold?
            if diff > THRESHOLD:
                last_edit = ""
                last_accept = ""

                edit_query = deepcopy(PROV_QUERY)
                edit_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id
                edit_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "edit"
                provs = Provenance.q2obj(q=edit_query)
                if len(provs) > 0:
                    last_edit = provs[0].last_updated

                accept_query = deepcopy(PROV_QUERY)
                accept_query["query"]["bool"]["must"][0]["term"]["resource_id.exact"] = latest.id
                accept_query["query"]["bool"]["must"][1]["term"]["action.exact"] = "status:accepted"
                provs = Provenance.q2obj(q=accept_query)
                if len(provs) > 0:
                    last_accept = provs[0].last_updated

                out_applications.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.last_updated, latest.application_status, diff, last_edit, last_accept])

            # was the journal (in doaj) created before the application by greater than the threshold, and is it in a state other than rejected
            if mdiff < -1 * THRESHOLD and latest.application_status != constants.APPLICATION_STATUS_REJECTED and journal.is_in_doaj():
                out_reapps.writerow([journal.id, journal.created_date, journal.last_update_request, latest.id, latest.created_date, latest.last_updated, latest.last_manual_update, latest.application_status, mdiff])

            # now figure out if the account is missing
            owner = journal.owner
            if owner is None:
                out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), "NO OWNER"])
            else:
                acc = Account.pull(owner)
                if acc is None:
                    out_accounts.writerow([journal.id, journal.created_date, journal.last_update_request, str(journal.is_in_doaj()), owner])

        print "processed", counter, "journals"