''' from bs4 import BeautifulSoup import requests import csv import pandas as pd import numpy as np import matplotlib.pyplot as plt import CrossparserUtils as cpu #cpu.parseSite(); print "Finished writing to the file" cols =['Date','Workout'] df = pd.read_csv('workouts_2005_HERO.tsv', sep='\t', converters={'Date': str}) df = df.dropna() df['Year'], df['Month'], df['Day'] = zip(*df["Date"].map(cpu.splitDate)) print df dfHero = df[df['Workout'].str.contains("HERO:")] print dfHero.reset_index(inplace=True) df_hero_year=dfHero.groupby(['Year']).size() print "Hero WODS per year : " print df_hero_year cpu.writeToFile('output/workouts_hero.tsv',['Year','Count'],'\t',cpu.listFromDf(df_hero_year))
cols =['Date','Workout'] df = pd.read_csv('workouts_2005.tsv', sep='\t', converters={'Date': str}) df = df.dropna() df['Year'], df['Month'], df['Day'] = zip(*df["Date"].map(cpu.splitDate)) print df #Workouts per year df_year_total=df.groupby(['Year']).size() print "Total workouts per year : " print df_year_total cpu.writeToFile('output/workouts_total_2005.tsv',['Year','Count'],'\t',cpu.listFromDf(df_year_total)) #Calculate Rest days per year dfRest = df[df['Workout'].str.contains("Rest Day") | df['Workout'].str.contains("Rest day")] print dfRest.reset_index(inplace=True) df_year_rest=dfRest.groupby(['Year']).size() print "Rest days per Year : " print df_year_rest cpu.writeToFile('output/workouts_rest.tsv',['Year','Count'],'\t',cpu.listFromDf(df_year_rest)) #Calculate for time days per year dfTime = df[df['Workout'].str.contains("For Time:") | df['Workout'].str.contains("For time:")] print dfTime.reset_index(inplace=True) df_year_time=dfTime.groupby(['Year']).size()