def async_job(user, limit=None, after=None, before=None): user.indexing(True) activities_list = [] count = 1 try: for a in self.client().get_activities(): d = strava2dict(a) if d.get("summary_polyline"): activities_list.append(d) if (limit or (after and (d["beginTimestamp"] >= after)) or (before and (d["beginTimestamp"] <= before))): d2 = dict(d) d2["beginTimestamp"] = str(d2["beginTimestamp"]) Q.put(d2) app.logger.info("put {} on queue".format(d2["id"])) if limit: limit -= 1 if not limit: Q.put({"stop_rendering": "1"}) else: Q.put({"msg": "indexing...{} activities".format(count)}) count += 1 gevent.sleep(0) except Exception as e: Q.put({"error": str(e)}) else: Q.put({"msg": "done indexing {} activities.".format(count)}) activity_index = (pd.DataFrame(activities_list) .set_index("beginTimestamp") .sort_index(ascending=False) .astype(dtypes)) app.logger.debug("done with indexing for {}".format(self)) dt_last_indexed = datetime.utcnow() packed = activity_index.to_msgpack(compress='blosc') cache.set(self.index_key(), (dt_last_indexed, packed), CACHE_INDEX_TIMEOUT) app.logger.info("cached {}, size={}".format(self.index_key(), len(packed))) finally: user.indexing(False) Q.put(StopIteration)
def indexing(self, status=None): # Indicate to other processes that we are currently indexing # This should not take any longer than 30 seconds key = "indexing {}".format(self.strava_id) if status is None: return cache.get(key) else: return cache.set(key, status, 30)
def index(self, activity_ids=None, limit=None, after=None, before=None): def strava2dict(a): return { "id": a.id, "name": a.name, "type": a.type, "summary_polyline": a.map.summary_polyline, "beginTimestamp": a.start_date_local, "total_distance": float(a.distance), "elapsed_time": int(a.elapsed_time.total_seconds()), "average_speed": float(a.average_speed) } dtypes = { "id": "uint32", "type": "category", "total_distance": "float32", "elapsed_time": "uint32", "average_speed": "float16" } if self.indexing(): return [{ "error": "Indexing activities for user {}...<br>Please try again in a few seconds.<br>" .format(self.strava_id) }] ind = cache.get(self.index_key()) if ind: dt_last_indexed, packed = ind activity_index = pd.read_msgpack(packed).astype({"type": str}) elapsed = (datetime.utcnow() - dt_last_indexed).total_seconds() # update the index if we need to if (elapsed > CACHE_INDEX_UPDATE_TIMEOUT) and (not OFFLINE): latest = activity_index.index[0] app.logger.info("updating activity index for {}" .format(self.strava_id)) already_got = set(activity_index.id) try: activities_list = [strava2dict( a) for a in self.client().get_activities(after=latest) if a.id not in already_got] except Exception as e: return [{"error": str(e)}] if activities_list: df = pd.DataFrame(activities_list).set_index( "beginTimestamp") activity_index = ( df.append(activity_index) .drop_duplicates() .sort_index(ascending=False) .astype(dtypes) ) dt_last_indexed = datetime.utcnow() cache.set(self.index_key(), (dt_last_indexed, activity_index.to_msgpack(compress='blosc')), CACHE_INDEX_TIMEOUT) if activity_ids: df = activity_index[activity_index["id"].isin(activity_ids)] else: if limit: df = activity_index.head(limit) else: df = activity_index if after: df = df[:after] if before: df = df[before:] df = df.reset_index() df.beginTimestamp = df.beginTimestamp.astype(str) return df.to_dict("records") # If we got here then the index hasn't been created yet Q = Queue() P = Pool() def async_job(user, limit=None, after=None, before=None): user.indexing(True) activities_list = [] count = 1 try: for a in self.client().get_activities(): d = strava2dict(a) if d.get("summary_polyline"): activities_list.append(d) if (limit or (after and (d["beginTimestamp"] >= after)) or (before and (d["beginTimestamp"] <= before))): d2 = dict(d) d2["beginTimestamp"] = str(d2["beginTimestamp"]) Q.put(d2) app.logger.info("put {} on queue".format(d2["id"])) if limit: limit -= 1 if not limit: Q.put({"stop_rendering": "1"}) else: Q.put({"msg": "indexing...{} activities".format(count)}) count += 1 gevent.sleep(0) except Exception as e: Q.put({"error": str(e)}) else: Q.put({"msg": "done indexing {} activities.".format(count)}) activity_index = (pd.DataFrame(activities_list) .set_index("beginTimestamp") .sort_index(ascending=False) .astype(dtypes)) app.logger.debug("done with indexing for {}".format(self)) dt_last_indexed = datetime.utcnow() packed = activity_index.to_msgpack(compress='blosc') cache.set(self.index_key(), (dt_last_indexed, packed), CACHE_INDEX_TIMEOUT) app.logger.info("cached {}, size={}".format(self.index_key(), len(packed))) finally: user.indexing(False) Q.put(StopIteration) P.apply_async(async_job, [self, limit, after, before]) return Q
def cache(self, identifier=None, timeout=CACHE_USERS_TIMEOUT): key = User.key(identifier or self.strava_id) cache.set(key, self, timeout) app.logger.debug( "cached {} with key '{}' for {} sec".format(self, key, timeout)) return self