Пример #1
0
    def make_status_spread(cls, desired_output, period, role_map):
        desired_output = deepcopy(desired_output)
        header = desired_output[0]
        del desired_output[0]
        del header[0]
        ranges = []
        for h in header:
            start = None
            end = None
            if period == "month":
                startts = dates.parse(h, "%Y-%m")
                year, month = divmod(startts.month + 1, 12)
                if month == 0:
                    month = 12
                    year = year - 1
                endts = datetime(startts.year + year, month, 1)
                start = dates.format(startts)
                end = dates.format(endts)
            elif period == "year":
                startts = dates.parse(h, "%Y")
                endts = datetime(startts.year + 1, 1, 1)
                start = dates.format(startts)
                end = dates.format(endts)

            ranges.append((start, end))

        provs = []
        for row in desired_output:
            user = row[0]
            del row[0]
            role = role_map[user]
            for i in range(len(row)):
                count = row[i]
                start, end = ranges[i]
                status = None
                if role == "associate_editor":
                    status = constants.APPLICATION_STATUS_COMPLETED
                elif role == "editor":
                    status = constants.APPLICATION_STATUS_READY
                elif role == "admin":
                    status = ADMIN_STATUSES[randint(0,
                                                    len(ADMIN_STATUSES) - 1)]
                for j in range(count):
                    p = Provenance()
                    p.set_created(dates.random_date(start, end))
                    p.user = user
                    p.roles = [role]
                    p.type = "suggestion"
                    p.action = "status:" + status
                    p.resource_id = uuid.uuid4().hex
                    provs.append(p)

        return provs
Пример #2
0
    def make_status_spread(cls, desired_output, period, role_map):
        desired_output = deepcopy(desired_output)
        header = desired_output[0]
        del desired_output[0]
        del header[0]
        ranges = []
        for h in header:
            start = None
            end = None
            if period == "month":
                startts = dates.parse(h, "%Y-%m")
                year, month = divmod(startts.month+1, 12)
                if month == 0:
                    month = 12
                    year = year - 1
                endts = datetime(startts.year + year, month, 1)
                start = dates.format(startts)
                end = dates.format(endts)
            elif period == "year":
                startts = dates.parse(h, "%Y")
                endts = datetime(startts.year + 1, 1, 1)
                start = dates.format(startts)
                end = dates.format(endts)

            ranges.append((start, end))

        provs = []
        for row in desired_output:
            user = row[0]
            del row[0]
            role = role_map[user]
            for i in range(len(row)):
                count = row[i]
                start, end = ranges[i]
                status = None
                if role == "associate_editor":
                    status = constants.APPLICATION_STATUS_COMPLETED
                elif role == "editor":
                    status = constants.APPLICATION_STATUS_READY
                elif role == "admin":
                    status = ADMIN_STATUSES[randint(0, len(ADMIN_STATUSES) - 1)]
                for j in range(count):
                    p = Provenance()
                    p.set_created(dates.random_date(start, end))
                    p.user = user
                    p.roles = [role]
                    p.type = "suggestion"
                    p.action = "status:" + status
                    p.resource_id = uuid.uuid4().hex
                    provs.append(p)

        return provs
Пример #3
0
def content_reports(fr, to, outdir):
    report = {}

    q = ContentByDate(fr, to)
    res = models.Suggestion.query(q=q.query())
    year_buckets = res.get("aggregations", {}).get("years", {}).get("buckets", [])
    for years in year_buckets:
        ds = years.get("key_as_string")
        do = dates.parse(ds)
        year = do.year
        if year not in report:
            report[year] = {}
        country_buckets = years.get("countries", {}).get("buckets", [])
        for country in country_buckets:
            cc = country.get("key")
            cn = datasets.get_country_name(cc)
            if cn not in report[year]:
                report[year][cn] = {}
            count = country.get("doc_count")
            report[year][cn]["count"] = count

    table = _tabulate_time_entity_group(report, "Country")

    filename = "applications_by_year_by_country__" + _fft(fr) + "_to_" + _fft(to) + "__on_" + dates.today() + ".csv"
    outfiles = []
    outfile = os.path.join(outdir, filename)
    outfiles.append(outfile)
    with codecs.open(outfile, "wb", "utf-8") as f:
        writer = UnicodeWriter(f)
        for row in table:
            writer.writerow(row)

    return outfiles
Пример #4
0
def content_reports(fr, to, outdir):
    report = {}

    q = ContentByDate(fr, to)
    res = models.Suggestion.query(q=q.query())
    year_buckets = res.get("aggregations", {}).get("years",
                                                   {}).get("buckets", [])
    for years in year_buckets:
        ds = years.get("key_as_string")
        do = dates.parse(ds)
        year = do.year
        if year not in report:
            report[year] = {}
        country_buckets = years.get("countries", {}).get("buckets", [])
        for country in country_buckets:
            cc = country.get("key")
            cn = datasets.get_country_name(cc)
            if cn not in report[year]:
                report[year][cn] = {}
            count = country.get("doc_count")
            report[year][cn]["count"] = count

    table = _tabulate_time_entity_group(report, "Country")

    filename = "applications_by_year_by_country__" + _fft(fr) + "_to_" + _fft(
        to) + "__on_" + dates.today() + ".csv"
    outfiles = []
    outfile = os.path.join(outdir, filename)
    outfiles.append(outfile)
    with codecs.open(outfile, "wb", "utf-8") as f:
        writer = UnicodeWriter(f)
        for row in table:
            writer.writerow(row)

    return outfiles
Пример #5
0
 def date(self, val):
     if val:
         try:
             parsed_date = dates.parse(val)
             val = parsed_date.year
         except ValueError:
             val = None
     self._date = val
Пример #6
0
    def make_action_spread(cls, desired_output, action, period):
        desired_output = deepcopy(desired_output)
        header = desired_output[0]
        del desired_output[0]
        del header[0]
        ranges = []
        for h in header:
            start = None
            end = None
            if period == "month":
                startts = dates.parse(h, "%Y-%m")
                year, month = divmod(startts.month+1, 12)
                if month == 0:
                    month = 12
                    year = year - 1
                endts = datetime(startts.year + year, month, 1)
                start = dates.format(startts)
                end = dates.format(endts)
            elif period == "year":
                startts = dates.parse(h, "%Y")
                endts = datetime(startts.year + 1, 1, 1)
                start = dates.format(startts)
                end = dates.format(endts)

            ranges.append((start, end))

        provs = []
        for row in desired_output:
            user = row[0]
            del row[0]
            for i in range(len(row)):
                count = row[i]
                start, end = ranges[i]
                for j in range(count):
                    p = Provenance()
                    p.set_created(dates.random_date(start, end))
                    p.user = user
                    p.type = "suggestion"
                    p.action = action
                    p.resource_id = uuid.uuid4().hex
                    provs.append(p)

        return provs
Пример #7
0
    def make_action_spread(cls, desired_output, action, period):
        desired_output = deepcopy(desired_output)
        header = desired_output[0]
        del desired_output[0]
        del header[0]
        ranges = []
        for h in header:
            start = None
            end = None
            if period == "month":
                startts = dates.parse(h, "%Y-%m")
                year, month = divmod(startts.month + 1, 12)
                if month == 0:
                    month = 12
                    year = year - 1
                endts = datetime(startts.year + year, month, 1)
                start = dates.format(startts)
                end = dates.format(endts)
            elif period == "year":
                startts = dates.parse(h, "%Y")
                endts = datetime(startts.year + 1, 1, 1)
                start = dates.format(startts)
                end = dates.format(endts)

            ranges.append((start, end))

        provs = []
        for row in desired_output:
            user = row[0]
            del row[0]
            for i in range(len(row)):
                count = row[i]
                start, end = ranges[i]
                for j in range(count):
                    p = Provenance()
                    p.set_created(dates.random_date(start, end))
                    p.user = user
                    p.type = "suggestion"
                    p.action = action
                    p.resource_id = uuid.uuid4().hex
                    provs.append(p)

        return provs
Пример #8
0
    def iterate(self, issn, since, to=None):
        # set the default value for to, if not already set
        if to is None:
            to = dates.now()

        # get the dates into a datestamp
        sd = dates.parse(since)
        td = dates.parse(to)

        # calculate the ranges we're going to want to query by
        # We're going to query epmc one day at a time, so that we can effectively
        # iterate through in updated date order (though within each day, there will
        # be no ordering, there is little we can do about that except reduce the
        # request granularity further, which would massively increase the number
        # of requests)
        ranges = dates.day_ranges(sd, td)
        throttle = app.config.get("EPMC_HARVESTER_THROTTLE")

        last = None
        for fr, until in ranges:
            # throttle each day
            if last is not None and throttle is not None:
                diff = (datetime.utcnow() - last).total_seconds()
                app.logger.debug(
                    "Last day request at {x}, {y}s ago; throttle {z}s".format(
                        x=last, y=diff, z=throttle))
                if diff < throttle:
                    waitfor = throttle - diff
                    app.logger.debug(
                        "Throttling EPMC requests for {x}s".format(x=waitfor))
                    time.sleep(waitfor)

            # build the query for the oa articles in that issn for the specified day (note we don't use the range, as the granularity in EPMC means we'd double count
            # note that we use date_sort=True as a weak proxy for ordering by updated date (it actually orders by publication date, which may be partially the same as updated date)
            query = queries.oa_issn_updated(issn, fr, date_sort=True)
            for record in client.EuropePMC.complex_search_iterator(
                    query, throttle=throttle):  # also throttle paging requests
                article = self.crosswalk(record)
                yield article, fr

            last = datetime.utcnow()
Пример #9
0
    def make_application_spread(cls, desired_output, period):
        desired_output = deepcopy(desired_output)
        header = desired_output[0]
        del desired_output[0]
        del header[0]
        ranges = []
        for h in header:
            start = None
            end = None
            if period == "month":
                startts = dates.parse(h, "%Y-%m")
                year, month = divmod(startts.month+1, 12)
                if month == 0:
                    month = 12
                    year = year - 1
                endts = datetime(startts.year + year, month, 1)
                start = dates.format(startts)
                end = dates.format(endts)
            elif period == "year":
                startts = dates.parse(h, "%Y")
                endts = datetime(startts.year + 1, 1, 1)
                start = dates.format(startts)
                end = dates.format(endts)

            ranges.append((start, end))

        apps = []
        for row in desired_output:
            country = row[0]
            del row[0]
            for i in range(len(row)):
                count = row[i]
                start, end = ranges[i]
                for j in range(count):
                    s = Suggestion()
                    s.set_created(dates.random_date(start, end))
                    s.bibjson().country = country
                    apps.append(s)

        return apps
Пример #10
0
    def test_04_timeout(self):
        source = JournalFixtureFactory.make_journal_source()
        j = models.Journal(**source)
        j.save()

        time.sleep(2)

        after = datetime.utcnow() + timedelta(seconds=2300)

        # set a lock with a longer timout
        l = lock.lock("journal", j.id, "testuser", 2400)

        assert dates.parse(l.expires) > after
Пример #11
0
    def test_04_timeout(self):
        source = JournalFixtureFactory.make_journal_source()
        j = models.Journal(**source)
        j.save()

        time.sleep(2)

        after = datetime.utcnow() + timedelta(seconds=2300)

        # set a lock with a longer timout
        l = lock.lock("journal", j.id, "testuser", 2400)

        assert dates.parse(l.expires) > after
Пример #12
0
 def stampify(val):
     return dates.parse(val, format=in_format)