示例#1
0
def calculate_scores(data):
    if len(data) == 0:
        return math.nan, math.nan
    else:
        #relative stock performance
        data_pct = (data - data.iloc[0])/data.iloc[0]*100
        
        #yearly stock performance
        data_year = data.groupby([data.index.year]).apply(lambda x: (x.iloc[-1]-x.iloc[0])/x.iloc[0]*100)
        year_median = data_year.median()
        cagr = (((data_pct.iloc[-1] / 100 + 1) ** (1/data_year.index.size) - 1) * 100)
        
        return cagr, year_median
示例#2
0
#%%
# aggregating data with rolling mean
daily = data.resample('D').sum()
daily.rolling(30, center = True).sum().plot(style = [':', '--', '-'])
plt.ylabel('mean hourly count')

#%%
# Ussing Gaussian window
daily.rolling(50, center = True,
              win_type = 'gaussian').sum(std = 10).plot()

#%%
# Digging in
# average traffic as a function of tge time of day
by_time = data.groupby(data.index.time).mean()
hourly_ticks = 4 * 60 * 60 * np.arange(6)
by_time.plot(xticks = hourly_ticks)

#%% 
# How things change based on the day of the week
by_weekday = data.groupby(data.index.dayofweek).mean()
by_weekday.index = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']
by_weekday.plot()

#%%
# let's do a compound groupby and look at the hourly trend on
# weekdats versus weekends
weekend = np.where(data.index.weekday < 5, 'Weekday', 'Weekend')
by_time = data.groupby([weekend, data.index.time]).mean()
data.to_csv("/Users/xiangliu/Desktop/CSC560 Data/pollution_AQI.csv",
            index=True,
            sep=',')

# In[6]:

data = pd.read_csv('/Users/xiangliu/Desktop/CSC560 Data/pollution_AQI.csv')
data.shape

# In[7]:

data.head(3)

# In[8]:

data.groupby(['State']).count()
#Found there are 5 states no there(Montana, Nebraska, Mississippi, West virgina, Vermont)

# In[9]:

le = data['CO AQI']
le[le.isnull()]

# In[10]:

le = data['SO2 AQI']
le[le.isnull()]

# In[11]:

le = data['NO2 AQI']