Exemplo n.º 1
0
def get_strava_api(secret, ID):
    all_act = []
    client = Client(access_token=secret)
    tot = total_num(client)

    me = client.get_athlete(ID)
    activities = client.get_activities()

    for i in trange(tot):
        df = pd.DataFrame()
        _a = activities.next()

        _streams = client.get_activity_streams(_a.id, types=types)
        for item in types:
            if item in _streams.keys():
                df[item] = pd.Series(_streams[item].data, index=None)
            df['act_id'] = _a.id
            df['act_name'] = _a.name
            df['act_type'] = _a.type

        df['lat'] = map(split_lat, (df['latlng']))
        df['lon'] = map(split_long, (df['latlng']))
        df['time'] = df['distance'] / (df['velocity_smooth'])
        df.fillna(0)
        all_act.append(df)
        del df

    with open(save_file + '.pkl', 'wb') as fp:
        pickle.dump(all_act, fp)

    pd.concat(all_act, ignore_index=True).to_csv(save_file + '.csv')

    return all_act
Exemplo n.º 2
0
def home(request):
    if 'access_token' not in request.session:
        return redirect('auth')
    # Retrieving athlete's data from API
    client = Client(access_token=request.session.get('access_token'))
    athlete = client.get_athlete()
    # Checking if the athlete has already visited the site before
    try:
        db_athlete = Athlete.objects.get(id=athlete.id)
    except Athlete.DoesNotExist:
        db_athlete = Athlete.create(athlete)
        db_athlete.save()
    # Getting the latest activity from the current athlete
    try:
        latest_activity = Activity.objects.filter(athlete=db_athlete).latest('date')
        activities = client.get_activities(after=latest_activity.date)
    except Activity.DoesNotExist:
        activities = client.get_activities()
    # Adding new activities to the database
    for activity in activities:
        if not activity.manual:
            stream = client.get_activity_streams(activity.id, types=['latlng'], resolution='medium').get('latlng')
            db_activity = Activity.create(activity, db_athlete, stream.data)
            db_activity.save()
    return render(request, 'home.html', {'athlete': athlete, 'activities': activities, 'act_length': len(list(activities))})
Exemplo n.º 3
0
class StravaData:
    RUNNER_ID = None
    ACCESS_TOKEN = None
    client = None

    def __init__(self, runner_id, access_token):
        self.RUNNER_ID = runner_id
        self.ACCESS_TOKEN = access_token
        self.client = Client()
        self.client.access_token = access_token

    def getAthlete(self):
        athlete = self.client.get_athlete()
        return athlete

    def getActivities(self):
        activities = self.client.get_activities()
        return activities

    def getActivity(self, id):
        activity = self.client.get_activity(id)
        return activity

    def getActivityStreams(self, id, types):
        streams = self.client.get_activity_streams(id,
                                                   types=types,
                                                   resolution='medium')
        return streams
Exemplo n.º 4
0
def FetchGPSData(tokensFile,CPCdate,CPClen):
    client = Client()
    ###To get the saved access tokens below, I did the following:
    ##1. Run the following lines:
    #authorize_url = client.authorization_url(client_id=22380, redirect_uri='http://sustainability.leeds.ac.uk',approval_prompt='force')
    #print(authorize_url)
    ##2. Paste the above url into a browser, accept the request,
    ##   and copy the 'code' from the resulting url into the following line,
    ##   along with the client_secret which can be found under air pollution9 account on strava:
    #access_token = client.exchange_code_for_token(client_id=22380, client_secret='***',
    #  code='***')
    ##3. Extract token from the above variable:
    #print(access_token)
    ###Saved access tokens:
    f=open(tokensFile,'r')
    myTokens=f.read().splitlines()
    f.close()
    #Find activity which most closely matches CPC start date/time and sample length
    #All activities within 5 mins of the CPC start date are considered
    #The activity with the closest-matching elapsed time to the CPC sample length is then chosen
    validActs={}
    for i,token in enumerate(myTokens):
        client.access_token = token
        #athlete = client.get_athlete()
        #print(athlete.firstname,athlete.lastname+':')
        myActivities=client.get_activities()
        for activity in myActivities:
            startDate=activity.start_date_local
            #print('    '+activity.name+':',startDate,'Local time')
            if abs((CPCdate-startDate).total_seconds()) < 60:
                validActs.update({i:activity.id})
    assert len(validActs) > 0, "No GPS activities with a start time within 5 minutes of the CPC data file start time"
    DeltaT=1e10
    for key,value in validActs.items():
        client.access_token=myTokens[key]
        activity=client.get_activity(value)
        elap=activity.elapsed_time.seconds
        thisDT=abs(CPClen-elap)
        if thisDT < DeltaT:
            DeltaT=thisDT
            chosenAth=key
            chosenAct=value
    #Extract required data from chosen activity:
    client.access_token=myTokens[chosenAth]
    activity=client.get_activity(chosenAct)
    startDate=activity.start_date_local
    endDate=startDate+dt.timedelta(seconds=activity.elapsed_time.seconds)
    endDateCPC=CPCdate+dt.timedelta(seconds=CPClen)
    assert abs((endDateCPC-endDate).total_seconds()) < 60, "No valid GPS activities with an end time within 1 minute of the CPC data file end time"
    myTypes = ['time', 'latlng']
    myStream = client.get_activity_streams(chosenAct,types=myTypes)
    latlon=myStream['latlng'].data
    lat=[latlon[i][0] for i in range(len(latlon))]
    lon=[latlon[i][1] for i in range(len(latlon))]
    time=myStream['time'].data
    dateTime=[startDate+dt.timedelta(seconds=i) for i in time]
    GPSData=pd.DataFrame(data={'lon':lon,'lat':lat,'dateTime':dateTime})
    return GPSData
Exemplo n.º 5
0
def main():
    """this is the main function for the cycle mapping program. It calls everything else"""
    act_dict={}
    client=Client(access_token="ACCESS_TOKEN_HERE ")
    activities = client.get_activities(limit=1)
    for activity in activities:
        act_dict[activity.name]=activity.id
        streams = client.get_activity_streams(activity.id,types = ['time', 'latlng', 'altitude', 'heartrate', 'temp', ], resolution='medium')
        print(streams.keys())
    print(act_dict)
Exemplo n.º 6
0
def strava_last_activity(request):
    currentUser = User.objects.get(pk=request.user.id)

    client = Client()
    client.access_token = currentUser.profile.stravaAccessCode

    activities = client.get_activities(before=datetime.datetime.now(), limit=1)
    activityName = ''
    for activity in activities:
        activityName = activity.name
        downloadedActivity = client.get_activity(activity.id)
        activityStream = client.get_activity_streams(activity.id,
                                                     types=["time", "watts"])
        averageCadence = downloadedActivity.average_cadence

    return render(request, 'strava_last_activity.html',
                  {'activity_name': averageCadence})
Exemplo n.º 7
0
def get_data():
    load_dotenv(find_dotenv())
    # authorize_url = client.authorization_url(client_id=os.getenv("client_id"),
    #  redirect_uri='http://localhost:8282/authorized')

    # Have the user click the authorization URL, a 'code' param will be added to the
    # redirect_uri
    # .....

    # Extract the code from your webapp response
    # code = request.get('code') # or whatever your framework does
    # access_token = client.exchange_code_for_token(client_id=22120,
    # client_secret='<client_secret>', code=code)

    client = Client(access_token=os.getenv("access_token"))
    #  client.access_token = os.getenv("access_token")
    activities = client.get_activities()
    types = ['time', 'latlng', 'altitude', 'heartrate', 'temp']
    headers_written = False
    #stream_types = ['time', 'latlng', 'altitude', 'heartrate', 'temp']
    stream_types = ['heartrate']
    with open(
            os.path.join("/home/greg/repos/commute_analysis", "data", "raw",
                         'raw_strava_data.csv'), 'w') as f:
        for activity in activities:
            streams = client.get_activity_streams(activity.id,
                                                  types=stream_types,
                                                  resolution='medium')
            temp = activity.to_dict()
            for k in types:
                if k in streams:
                    temp[k] = streams[k].data
                else:
                    temp[k] = None
            if not headers_written:
                w = csv.DictWriter(f, temp.keys())
                w.writeheader()
                headers_written = True
            w.writerow(temp)
Exemplo n.º 8
0
# follow this url and grab the code it gives you
print(url)


# new cell
code = "GRAB THE CODE FROM THE URL REDIRECT ABOVE"
access_token = client.exchange_code_for_token(client_id=MY_STRAVA_CLIENT_ID,
                                              client_secret=MY_STRAVA_CLIENT_SECRET,
                                              code=code)

activity_id = 123 # any activity (you can easily get a list of recents from client)
activity = client.get_activity(activity_id).to_dict()

types = ['time', 'latlng']
stream = client.get_activity_streams(activity_id, types=types)
print(activity["type"])

shoes = activity["gear"]["name"]
elapsed_time = activity["elapsed_time"]
moving_time = activity["moving_time"]
distance_miles = activity["distance"] * 0.000621371 # meters to miles
average_heartrate = activity["average_heartrate"]
max_heartrate = activity["max_heartrate"]
start_date_local = datetime.strptime(activity["start_date_local"], "%Y-%m-%dT%H:%M:%S")
name = activity["name"]
description = activity["description"]
calories = activity["calories"]

print(f"""
shoes:\t\t\t{shoes}
Exemplo n.º 9
0
''' Script to grab activities from strava '''

import sys
import stravalib
from stravalib.client import Client
from configparser import SafeConfigParser

config = SafeConfigParser()
config.read('config.yaml')

client = Client()
authorize_url = client.authorization_url(
    client_id=config.get('strava', 'client_id'),
    redirect_uri='http://localhost:8282/authorized')
# Extract the code from your webapp response
# access_token = client.exchange_code_for_token(client_id=config.get('strava', 'client_id'), client_secret=config.get('strava', 'client_secret'), code=config.get('strava', 'code'))

# Now store that access token somewhere (a database?)
client.access_token = config.get('strava', 'Bearer')
athlete = client.get_athlete()

activities = client.get_activities(after="2017-11-17T00:00:00Z", limit=15)
activity_data = []
for activity in activities:
    activity_stream = client.get_activity_streams(activity.id,
                                                  types=['latlng', 'distance'])
    activity_data.append(activity_stream['latlng'].data)
Exemplo n.º 10
0
def read_strava(activity_id,
                access_token,
                refresh_token=None,
                client_id=None,
                client_secret=None,
                to_df=False,
                **kwargs):
    """
    This method loads the activity data from Strava into a Pandas DataFrame or
    runpandas Activity.
    Column names are translated to runpandas terminology
    (e.g. "heartrate" > "heart_rate").
    Datetimes indexes are replaced by time offsets.
    All NaN rows are removed.

    Attention: Two API requests are made to the Strava webservice: 1 to
               retrieve activity metadata, 1 to retrieve the raw data ("streams").

    Parameters
    ----------
        activity_id : str, The id of the activity
        access_token: str, The Strava access token
        refresh_token: str, The Strava refresh token, optional
        client_id: int, The Strava client id used for token refresh, optional
        client_secret: str, The strava client secret used for token refresh, optional
        to_df : bool, optional
             Return a obj:`runpandas.Activity` if `to_df=True`, otherwise
             a :obj:`pandas.DataFrame` will be returned. Defaults to False.
        **kwargs :
        Keyword args to be passed to the `read_strava`
    Returns
    -------
    Return a obj:`runpandas.Activity` if `to_df=True`, otherwise
             a :obj:`pandas.DataFrame` will be returned.
    """

    client = Client()
    client.access_token = access_token
    client.refresh_token = refresh_token

    activity = client.get_activity(activity_id)

    start_datetime = activity.start_date_local
    streams = client.get_activity_streams(activity_id=activity_id,
                                          types=STREAM_TYPES,
                                          series_type="time")

    data = pd.DataFrame(gen_records(streams))

    times = data.pop("time")
    data.columns = map(utils.camelcase_to_snakecase, data.columns)

    def time_to_datetime(time):
        return start_datetime + timedelta(seconds=time)

    timestamps = times.apply(time_to_datetime)
    timeoffsets = timestamps - timestamps[0]
    timestamp_index = TimedeltaIndex(timeoffsets, unit="s", name="time")
    data.index = timestamp_index
    data.dropna(axis=1, how="all", inplace=True)

    if to_df:
        return data

    return Activity(data, cspecs=COLUMNS_SCHEMA, start=timestamps[0])
Exemplo n.º 11
0

# Download some activities
print "Downloading activities from {0:%d %b %Y}".format(from_date)
acts = client.get_activities(after=from_date)

for act in acts:
    total += 1

    if act.type != "Run" or act.average_heartrate is None:
        continue

    count += 1

    # Get the full data streams
    streams = client.get_activity_streams(act.id, types=stream_filter)
    sdf = pd.DataFrame(dict((stype, stream.data) for (stype, stream) in streams.iteritems()))

    if "latlng" in stream_filter:
      sdf["lat"] = [a[0] for a in sdf.latlng]
      sdf["lng"] = [a[1] for a in sdf.latlng]
      del sdf["latlng"]

    detail_fname = join(output_detail_dir, "{0}.json".format(act.id))
    sdf.to_json(detail_fname)

    # with open(join(output_detail_dir, "{0}.p".format(act.id)), "wb") as f:
    #     pickle.dump(sdf, f, 2)

    print "{0} on {1:%d %b %Y} [kudos {2}]".format(act.name, act.start_date, act.kudos_count)
    print "\tHR: {0}".format(act.average_heartrate)
Exemplo n.º 12
0
#print(ali)

acts = client.get_activities(before=None, after=None, limit=10)

for act in acts:
    print(act.id, act.name, unithelper.miles(act.distance), act.description)

#testing
#ryan = client.get_athlete(6777976)

#print ryan.city

# Activities can have many streams, you can request n desired stream types
types = [
    'time',
    'latlng',
    'altitude',
    'heartrate',
    'temp',
]

streams = client.get_activity_streams(1179030205, types=types)

#  This returns initial data for start of activity via streams
# print('streams:')
# print(streams)
# print('kvs:')
# for key, value in streams.items():
#     print(key + ' has ' + str(value.original_size) + ' original data points')
#     points = value.data
#     print(points[0])
Exemplo n.º 13
0
    activity_ids.append(ra.id)

print activity_ids

for activity_id in activity_ids:

    act = client.get_activity(activity_id)
    print act.name, act.type, act.athlete.firstname, act.athlete.lastname
    if (act.gear == None and act.calories == None):
        print 'Summary Activity'
    else:
        print 'Detailed Activity'
    
    if False:
        #act_streams = client.get_activity_streams(activity_id, types=['velocity_smooth','grade_smooth'], resolution='high')
        act_streams = client.get_activity_streams(activity_id, types=['velocity_smooth','grade_smooth'])
        print (act_streams)
        vs = act_streams['velocity_smooth']
        gs = act_streams['grade_smooth']
        dst = act_streams['distance']
        #print (vs.data)
        vs_sorted = vs.data
        vs_sorted.sort()
        #print vs_sorted
        
        p_25th = percentile(vs_sorted, 0.25)
        p_50th = percentile(vs_sorted, 0.50)
        p_75th = percentile(vs_sorted, 0.75)
        p_90th = percentile(vs_sorted, 0.9)
        
        print 'Activity ID: {}'.format(activity_id)
Exemplo n.º 14
0
                                             for x in ('Pedalada', 'Ciclismo',
                                                       'Vuelta ciclista',
                                                       'Ride', 'almuerzo')):
                                client.update_activity(activity.id,
                                                       private=d['private'],
                                                       commute=d['commute'],
                                                       name=d['name_back'])
                            prev_activity = activity
                            current_activity_processed = True

                    # Paso por lugar configurado
                    for p in k['pass_through_locations']:
                        # Solo act de mas de 20k
                        if activity.distance and int(
                                activity.distance) > 20000:
                            streams = client.get_activity_streams(
                                activity.id, ['latlng'], 'medium')
                            if 'latlng' in streams.keys():
                                if pass_through_zone(p['latlng'],
                                                     streams['latlng'].data,
                                                     precision=0.3):
                                    print(p['name'], end="")
                                    print(
                                        u": [{0.suffer_score}] \"{0.name}\" {0.moving_time} {0.distance} {0.start_date_local} https://www.strava.com/activities/{0.id}"
                                        .format(activity))
                                    if write and any(
                                            x in activity.name
                                            for x in ('Pedalada', 'Ciclismo',
                                                      'Vuelta ciclista',
                                                      'Ride', 'almuerzo')):
                                        client.update_activity(activity.id,
                                                               name=p['name'])
Exemplo n.º 15
0
resolution = 'high'
types = ['time', 'altitude', 'heartrate', 'temp', 'distance', 'watts']

access_token = client.exchange_code_for_token(client_id=client_id,
                                              client_secret=client_secret,
                                              code=code)

client = Client(access_token=access_token)
df_overview = pd.DataFrame()
activities = dict()

for activity in client.get_activities(
        after='{}-01-01T00:00:00Z'.format(str(year)),
        before='{}-01-01T00:00:00Z'.format(str(year + 1))):
    streams = client.get_activity_streams(activity.id,
                                          types=types,
                                          series_type='time',
                                          resolution=resolution)
    for key, value in streams.items():
        streams[key] = value.data

    df_overview = df_overview.append(
        pd.DataFrame([{
            'Name': activity.name,
            'Date': activity.start_date_local,
            'Durasi [min]': int(activity.moving_time.seconds / 60),
            'Jarak [m]': round(activity.distance.num, 1),
            'Measurements': list(streams.keys())
        }],
                     index=[activity.id]))
    activities[activity.id] = pd.DataFrame(streams)
writer = pd.ExcelWriter('strava_export_{}.xlsx'.format(str(year)),
Exemplo n.º 16
0
        "follower": null
    }
}"""

from stravalib.client import Client

client = Client(access_token="ff95a3c12476ca93a68825a38688dc72fb164e62 ")
activities = client.get_activities(limit=10)

run = client.get_activity(list(activities)[0].id)

# Activities can have many streams, you can request n desired stream types
types = ['time', 'latlng', 'altitude', 'distance']

streams = client.get_activity_streams(list(activities)[0].id,
                                      types=types,
                                      resolution='high')

#  Result is a dictionary object.  The dict's key are the stream type.

heights = streams['altitude'].data
cords = streams['latlng'].data
times = streams['time'].data
distances = streams['distance'].data

import csv
from datetime import datetime

pacePoints = 0
paceSum = 0
paces = [[]]
Exemplo n.º 17
0
def read_strava(
    activity_id: int,
    access_token: str,
    refresh_token: str = None,
    client_id: int = None,
    client_secret: str = None,
    resample: bool = False,
    interpolate: bool = False,
) -> pd.DataFrame:
    """This method lets you retrieve activity data from Strava.
    Columns names are translated to sweat terminology (e.g. "heart_rate" > "heartrate").
    Two API calls are made to the Strava API: 1 to retrieve activity metadata, 1 to retrieve the raw data ("streams").

    Args:
        activity_id: The id of the activity
        access_token: The Strava access token
        refresh_token: The Strava refresh token. Optional.
        client_id: The Strava client id. Optional. Used for token refresh.
        client_secret: The Strava client secret. Optional. Used for token refresh.
        resample: whether or not the data frame needs to be resampled to 1Hz
        interpolate: whether or not missing data in the data frame needs to be interpolated

    Returns:
        A pandas data frame with all the data.
    """
    client = Client()
    client.access_token = access_token
    client.refresh_token = refresh_token

    activity = client.get_activity(activity_id)
    start_datetime = activity.start_date_local

    streams = client.get_activity_streams(
        activity_id=activity_id,
        types=STREAM_TYPES,
        series_type="time",
    )

    raw_data = dict()
    for key, value in streams.items():
        if key == "latlng":
            latitude, longitude = list(zip(*value.data))
            raw_data["latitude"] = latitude
            raw_data["longitude"] = longitude
        else:
            try:
                key = COLUMN_TRANSLATIONS[key]
            except KeyError:
                pass

            raw_data[key] = value.data

    data = pd.DataFrame(raw_data)

    def time_to_datetime(time):
        return start_datetime + timedelta(seconds=time)

    data["datetime"] = data["time"].apply(time_to_datetime)

    data = data.drop(["time"], axis="columns")

    data = data.set_index("datetime")

    data = resample_data(data, resample, interpolate)

    return data
Exemplo n.º 18
0
    strava_data['start_date_local'] = pd.to_datetime(
        strava_data['start_date_local'])
    activities = client.get_activities(strava_data['start_date_local'].max())
    for activity in activities:
        strava_data.append(pd.from_dict(activity.to_dict()), ignore_index=True)
    stava_data.to_csv('raw_strava_data.csv')
else:
    activities = client.get_activities()
    types = ['time', 'latlng', 'altitude', 'heartrate', 'temp']
    headers_written = False
    #stream_types = ['time', 'latlng', 'altitude', 'heartrate', 'temp']
    stream_types = ['heartrate']
    with open('raw_strava_data.csv', 'w') as f:
        for activity in activities:
            streams = client.get_activity_streams(activity.id,
                                                  types=stream_types,
                                                  resolution='medium')
            temp = activity.to_dict()
            for k in types:
                if k in streams:
                    temp[k] = streams[k].data
                else:
                    temp[k] = None
            if not headers_written:
                w = csv.DictWriter(f, temp.keys())
                w.writeheader()
                headers_written = True
            w.writerow(temp)

# Get datetime from string. This is for later for updating the data
# t = datetime.strptime(str, '%Y-%m-%dT%H:%M:%S')
Exemplo n.º 19
0
subplt = plt.subplot(111)
fig.subplots_adjust(left=0.03, bottom=0.03, right=0.97, top=0.97)
m = Basemap(projection='lcc',
            resolution='h',
            width=basemap_width,
            height=basemap_height,
            lat_0=chicago_lat,
            lon_0=chicago_lon,
            ax=subplt,
            fix_aspect=False)

run_name = 'Chicago Marathon'
run = [i for i in runs if i.name == run_name][0]

plot_data = []
r = client.get_activity_streams(run.id, types=['latlng'], resolution='medium')
if r is not None:
    lats, lngs = zip(*r['latlng'].data)
    x, y = m(lngs, lats)
    plot_data = {'name': run.name, 'x': x, 'y': y}

# Interpolate the X/Y data to achieve the desired number of points
# necessary for framerate
xdata = np.interp(np.linspace(0, 999, frames),
                  *list(zip(*enumerate(plot_data['x']))))
ydata = np.interp(np.linspace(0, 999, frames),
                  *list(zip(*enumerate(plot_data['y']))))

frame_size = 750
subplt.set_xlim(xdata[0] - frame_size, xdata[0] + frame_size)
subplt.set_ylim(ydata[0] - frame_size, ydata[0] + frame_size)
Exemplo n.º 20
0
class strava_data_cache:

    #Some Constants
    Run         = 1
    Walk        = 2
    Hike        = 3
    Ride        = 4
    Swim        = 5
    Workout     = 6
    VirtualRide = 7
    AlpineSki   = 8

    stream_types = ['time', 'latlng', 'distance', 'altitude', 'velocity_smooth', \
                        'heartrate', 'cadence', 'watts', 'temp', 'moving', 'grade_smooth']
    
    stream_cols = ['time', 'distance', 'altitude', 'velocity_smooth', \
                        'heartrate', 'cadence', 'watts', 'temp', 'moving', 'grade_smooth', 'lat', 'lon']    
    def __init__(self):
        self.token_file = 'token.txt'
        self.activities_file = 'activities.csv'
        self.streams_file = 'streams.h5'
        self.activites_df = pd.DataFrame()
        self.client = None       

    #Setup Strava Client        
    def setup_client(self):
        self.client = Client()
        if os.path.exists(self.token_file):
           f = open(self.token_file, "r")
           token = f.readline()
           f.close()
        else:
           token = input('Enter access token: ')
        self.client = Client(access_token=token)
        self.client.get_athlete() # Get current athlete details

    #Pull Activities into specified csv file
    def get_activities(self):
        if not os.path.exists(self.activities_file):
            date = "1990-01-01T00:00:00Z"
            old_df = pd.DataFrame()
        else:
            old_df = pd.read_csv(self.activities_file)
            date = pd.to_datetime(old_df.iloc[-1]['start_date']).strftime('%Y-%m-%dT%H:%M:%SZ')
        activities = []
        for act in self.client.get_activities(after = date):
            activities.append(act)
        if len(activities) > 0:
            #package up these activites into a Dataframe from list of dictionary entries
            attributes = ['achievement_count',	'athlete_count',	'average_cadence',	'average_heartrate',	'average_speed',	
                          'average_temp',	'average_watts',	'best_efforts',	'calories',	'comment_count',	'commute',	'description',	
                          'device_name',	'device_watts',	'distance',	'elapsed_time',	'elev_high',	'elev_low',	'embed_token',	
                          'external_id',	'flagged',	'gear',	'gear_id',	'guid',	'has_heartrate',	'has_kudoed',	'id',	
                          'instagram_primary_photo',	'kilojoules',	'kudos_count',	'location_city',	'location_country',	
                          'location_state',	'manual',	'max_heartrate',	'max_speed',	'max_watts',	'moving_time',	'name',	
                          'partner_brand_tag',	'partner_logo_url',	'photo_count',	'photos',	'pr_count',	'private',	'resource_state',	
                          'segment_efforts',	'splits_metric',	'splits_standard',	'start_date',	'start_date_local',	'start_latitude',	
                          'start_longitude',	'suffer_score',	'timezone',	'total_elevation_gain',	'total_photo_count',	'trainer',	'type',	
                          'upload_id',	'weighted_average_watts',	'workout_type']
            new_df = pd.DataFrame([{fn: getattr(act, fn) for fn in attributes} for act in activities])
            new_df.distance = new_df.distance/1609 #convert distances from meters to miles  
            if len(old_df) == 0:
                self.activites_df = new_df
            else:
                self.activites_df = pd.concat([old_df,new_df])
            self.activites_df.to_csv(self.activities_file,encoding ='utf-8')  
        else:
            self.activites_df = old_df
        print("Number of activities: ", len(self.activites_df))

    #Pull new streams into h5 file
    def get_streams(self,maxNum = float("inf")):
        #Check if we have the activities if not then try and grab them
        if len(self.activites_df) == 0:
            self.get_activities()         

        type_map = pd.Series([self.Run,self.Ride,self.Swim,self.Walk,self.Workout,self.VirtualRide,self.Hike,self.AlpineSki],
                             index = ['Run', 'Ride', 'Swim', 'Walk',
                                        'Workout', 'VirtualRide', 'Hike', 'AlpineSki'])
                             
        with pd.HDFStore(self.streams_file,'a') as store:
            try:
                stream_ids = store['streams']['id'].unique()
            except:
                stream_ids = []
            for idx,row in self.activites_df.iterrows():  
                if idx > maxNum:
                    break;
                if row.id in stream_ids:
                    continue
                else:
                    print("Processing ID: {} Name: {}".format(row.id,row.name))
                try:
                    stream = self.client.get_activity_streams(row.id, types=self.stream_types)
                except:
                    print ("Unexpected error getting:", sys.exc_info()[0])
                    continue
                try:
                    temp_df = pd.DataFrame({i : stream[i].data for i in stream}, index=stream['time'].data)
                    if len(temp_df) == 0:
                        print ("Skipping empty stream: %s"%row.name)
                        continue
                    if 'latlng' in temp_df.keys():
                        temp_df['lat'], temp_df['lon'] = list(zip(*temp_df["latlng"]))
                        temp_df = temp_df.drop('latlng',axis=1)
                    if 'distance' in temp_df.keys():
                        temp_df['distance'] = temp_df['distance']/1609
                    temp_df['type'] = row.type
                    temp_df['id'] = row.id
                    
                    #common formatting to get it in the same format for uniform writes
                    for col in find_setdiff(self.stream_cols,temp_df.keys()):
                        temp_df[col] = pd.np.nan
                    temp_df['heartrate'] = temp_df['heartrate'].astype('float')
                    temp_df['cadence'] = temp_df['cadence'].astype('float')
                    temp_df['velocity_smooth'] = temp_df['velocity_smooth'].astype('float')  
                    temp_df['watts'] = temp_df['watts'].astype('float')  
                    temp_df['type'] = temp_df['type'].map(type_map)
                    store.append('streams', temp_df, data_columns=True, format='table',complib='blosc')
                except:
                    print ("Unexpected error post get:", sys.exc_info()[0])
    
    def simple_stream_test(self):
        print(self.client)
        print(self.activites_df.iloc[0].id)
        stream = self.client.get_activity_streams(self.activites_df.iloc[0].id, types=self.stream_types)
        print(stream)
Exemplo n.º 21
0
class stravaImporter(object):
    def __init__(self):
        self.client = Client()
        self.API_CALL_PAUSE_SECONDS = 1.5  # 40 requests per minute

        # the self-authorization is NOT working right now -- using hard-coded URL / ACCESS_CODE right now
        #url = self.client.authorization_url(client_id=CLIENT_ID,
        #                       redirect_uri='http://localhost:5000/authorization')
        #code = request.args.get('code') # or whatever flask does

        #url = 'http://www.strava.com/oauth/authorize?client_id=16424&response_type=code&redirect_uri=http://localhost/5001&approval_prompt=force&scope=write'
        #print(url)

        access_token = self.client.exchange_code_for_token(
            client_id=CLIENT_ID, client_secret=CLIENT_SECRET, code=ACCESS_CODE)
        # Now store that access token somewhere (a database?)
        self.client.access_token = access_token

        # retrieve the athlete
        self.athlete = self.client.get_athlete()
        print("For {}, I now have an access token".format(self.athlete.id))

        # name of tables in model
        self.user_TBL = 'users'
        self.activity_TBL = 'activities'
        self.streams_TBL = 'streams'
        self.gear_TBL = 'gear'

        # streams to extract from strava
        self.streams = [
            'time', 'latlng', 'distance', 'altitude', 'velocity_smooth',
            'heartrate', 'cadence', 'temp', 'moving', 'grade_smooth'
        ]

    def get_activities(self, before=None, after=None, limit=None):
        """ Get activities and the related metadata from strava """
        return list(
            self.client.get_activities(before=before, after=after,
                                       limit=limit))

    def get_streams(self, activity_id):
        # download the entire stream: `resolution` = `all` (default)
        # download all stream_types except `power`
        try:
            s = self.client.get_activity_streams(activity_id,
                                                 types=self.streams)
            return s
        except:
            print('Could not get streams for activity {0}. Manual upload?'.
                  format(activity_id))
            return

    def stream_to_DF(self, s):
        """ Convert a Strava Stream to a pandas DF """
        return pd.DataFrame.from_dict({k: s[k].data for k in s.keys()})

    def add_user(self,
                 username,
                 email=None,
                 fname=None,
                 lname=None,
                 password=None):
        """ Add a user to the DB"""

        s = Session()
        try:
            u = User(username=username,
                     email=email,
                     fname=fname,
                     lname=lname,
                     password=password,
                     strava_id=self.athlete.id)
            s.add(u)
            s.commit()
            return int(u.id)
        except SQLAlchemyError as err:
            s.rollback()
            print('Error: \n', err)
            raise

        s.close()
        return

    def add_streams(self, user_id, s_id):
        """ Add Strava data streams for a given user_id and activity_id """

        # get the strava streams for that activity
        stream = self.get_streams(s_id)
        # convert the streams to a DF
        if stream is not None:
            s = Session()

            df = self.stream_to_DF(stream)

            # add `user_id` to the DF
            df['user_id'] = user_id
            # add `activity_id` to the DF
            df['activity_id'] = s.query(
                Activity.id).filter_by(strava_id=s_id.astype(str)).one()[0]

            try:
                df.to_sql(self.streams_TBL,
                          engine,
                          if_exists='append',
                          index=False)
                s.commit()
            except:
                s.rollback()
                print('Error: `add_streams` cannot write event to DB. \n')
                raise

            s.close()
        else:
            print('Error: Stream is empty for User {0}, Activity {1}'.format(
                user_id, s_id))
        return

    def add_activity(self,
                     user_id,
                     before=None,
                     after=None,
                     limit=None,
                     add_streams=True):
        """ Get & add a list of activities from strava """

        # get the list of activities from strava
        activities = self.get_activities(before=before,
                                         after=after,
                                         limit=limit)
        activities = activities
        # transform activities to a DF ready for Postgres
        df = self.munge_activity(activities)
        df['user_id'] = user_id

        s = Session()
        try:
            df.to_sql(self.activity_TBL,
                      engine,
                      if_exists='append',
                      index=False)
            s.commit()
            print('Added {0} activities from Strava.\n'.format(
                len(df.strava_id)))
        except:
            s.rollback()
            print('Error: `add_activity` cannot write event to DB. \n')
            raise
        s.close()

        # if needed, add the streams as well
        if add_streams is True:
            size = len(df.strava_id)
            for (i, strava_id) in enumerate(df.strava_id):
                print('Fetching data streams for {0}: {1} of {2}'.format(
                    strava_id, i, size),
                      end='\r')
                time.sleep(
                    self.API_CALL_PAUSE_SECONDS)  # limit API call to 40 / min

                self.add_streams(user_id, strava_id)
            print('Added `Streams` for {0} activities from Strava.'.format(
                len(df.strava_id)))

        return

    def strip_units(self, df, cols):
        """ strip units from columns -- changes dtype from `object` to `float`"""

        d = {
            'average_speed': ' m / s',
            'max_speed': ' m / s',
            'distance': ' m',
            'total_elevation_gain': ' m'
        }

        for col in cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col].astype('str').str.strip(
                    d[col]),
                                        errors='coerce')

        return df

    def munge_activity(self, activities):
        """ Get `activities` ready for Postgres DB """

        # `stravalib`.`activity` attributes to import
        fields = [
            'athlete_count', 'average_cadence', 'average_heartrate',
            'average_speed', 'distance', 'elapsed_time', 'elev_high',
            'elev_low', 'end_latlng', 'external_id', 'gear_id',
            'has_heartrate', 'id', 'location_city', 'location_country',
            'location_state', 'manual', 'max_heartrate', 'max_speed',
            'moving_time', 'name', 'pr_count', 'start_date',
            'start_date_local', 'start_latitude', 'start_latlng',
            'start_longitude', 'suffer_score', 'timezone',
            'total_elevation_gain', 'type', 'upload_id', 'workout_type'
        ]

        # convert `activities` into a df with `fields` as columns
        df = pd.DataFrame([[getattr(i, j) for j in fields]
                           for i in activities],
                          columns=fields)

        # rename since `id` will be an internal DB `id`
        df.rename(columns={'id': 'strava_id'}, inplace=True)

        # pandas or SQL does not support units, so strip the units from the fields
        # could be worth adding units when the tables are imported
        # I want GIS support, so can't really move to JSONB or BSON
        cols_to_strip = ('average_speed', 'max_speed', 'distance',
                         'total_elevation_gain')
        df = self.strip_units(df, cols_to_strip)

        # pd.to_sql does not yet support TZ, so strip it. for now.
        # also does not suport timedelta ... maybe I have to ditch `pd.to_sql`
        # https://github.com/pandas-dev/pandas/issues/9086
        df.start_date = pd.DatetimeIndex(df.start_date).tz_convert(None)
        df.timezone = df.timezone.astype(str)
        return df