def earliestTimestampForMetric(metric): return min( filter( lambda point: point != None, map( lambda timeline: next((point for point in timeline if point[1] != 0), None), map(lambda doc: getattr(doc, metric), DocumentTimelines.fromFile("data/document_timelines.json")), ), ), key=lambda x: x[0], )[0]
f = open("foo", "w") for doc in docsTimelines: d2 = doc.trimmed() d2.publicationTimestamp = timestr2timestamp(d2.publicationTimestamp) f.write(d2.toJson(trimmed=True) + "\n") f.close()""" maxTimestamp = SimpleDoc.maximumTimestampInDataset print maxTimestamp numObservedDays = 500 docsTimelines = DocumentTimelines.fromFile("data/document_timelines_trimmed.json", trimmed=True, sort=True) attributes = [("scopusTimeline", (3, 9)), ("crossrefTimeline", (3, 9)), ("pubmedTimeline", (3, 9))] """[("citeULikeTimeline", (3,9)), ("pubmedTimeline", (3,9)), ("scopusTimeline", (3, 9)), ("pmcTimeline", (6, 11)), ("facebookTimeline", (12, 11)), ("mendeleyTimeline", (1, 12)), ("crossrefTimeline", (3, 9)), ("counterTimeline", (9, 9)), ("facebookTimeline", (12, 11)), ("mendeleyTimeline", (1, 12)), ("citeULikeTimeline", (3,9))]""" for toleranceDays in [2]: toleranceSeconds = toleranceDays * 60 * 60 * 24 ways = {} ys = [] for attribute, startDate in attributes: lowerBound = calendar.timegm(datetime.date(2000 + startDate[1], startDate[0], 1).timetuple()) upperBound = maxTimestamp - (300 * 60 * 60 * 24)