def get_should_long(prices: np.array) -> Tuple[float, float]: # use window to calculate moving avg and moving standard deviation fast = 9 slow = 20 moving_avg_fast = np.Series(prices).rolling(fast).mean() moving_avg_slow = np.Series(prices).rolling(slow).mean() moving_std = np.Series(prices).rolling(slow).std() bollinger_high = moving_avg_slow + (2 * moving_std) bollinger_low = moving_avg_slow - (2 * moving_std) return (moving_avg_fast[-1] - moving_avg_slow[-1]) / prices[-1], ( prices[-1] - bollinger_high[-1])
def beauty_coefficient(c): """ This function calculates the sleeping beauty coefficient and awakening time for a publication. See :cite:`ke2015beauty` for details. Parameters ---------- c : numpy array The yearly citation counts for the publication. Returns ---------- B : float Sleeping Beauty Coefficient t_a : int The awakening time """ t_m = np.argmax(c) B_denom = c B_denom[c == 0] = 1 # :cite:`ke2015beauty` eq 1/2 l_t = ((c[t_m] - c[0]) / t_m * np.arange(c.shape[0]) + c[0] - c) / B_denom # :cite:`ke2015beauty` eq 2 B = l_t[:(t_m + 1)].sum() d_denom = np.sqrt((c[t_m] - c[0])**2 + t_m**2) d_t = np.abs((c[t_m] - c[0]) * np.arange(c.shape[0]) + t_m * (c[0] - c)) / d_denom # :cite:`ke2015beauty` eq 3 t_a = np.argmax(d_t[:(t_m + 1)]) return np.Series([B, t_a])
pse.dtype //获取数据类型 pse.astype(np.float64) //修改数据类型,但是pse原本的数据类型不变 计算的特性与数组相同 d={'a':1,'b':2,'c':3,'d':4,'e':5} # print(pd.Series(d)) '基于索引的数据选择' print(pd.Series(d,index=list('abcde')))# pandas中数据的选择是基于索引完成的 print(pd.Series(d,index=list('edcba')))# 索引逆序 print(pd.Series(d,index=list('abcabc')))# 索引重复 print(pd.Series(d,index=list('ABCde')))# 索引无法对齐,自动填充缺失值 添加 Series.append() pse = pd.Series(data=[18, 30, 25, 40], index=list('abcd'), name="user_age_info", dtype=float) pse.append(np.Series([1,2]),ignore_index=True) //ignore_index为True,则添加后索引在原Series中继续相加,False则为新添加的数据重新从0开始添加索引 除 se = pd.Series(data=[18, 30, 25, 40], index=list('abcd'), name="user_age_info", dtype=float) pse.drop('a) //不修改源对象 del pse['a'] //删除源对象 inplace=True //参数inplace,表示是否对数据源进行更改 pse.drop_duplicates() //去重 返回带索引值 pse.unique() //去重 以数组形式返回 pse.nunique() //统计非重复数量 与pse.drop_duplicates().count() 等效 判断元素是否存在Serise的值中 18 in pse.values np.isin() //一个序列是否在另一个序列中
#Filling Missing Data df df.fillna(0) #Calling fillna with a dict you can use a different fill value for each column df df.fillna({1: 0.5, 3: -1}) #modify the existing object in place instead of returning a new object _ = df.fillna(0, inplace=True) #always returns a reference to the filled object df #using ffill df = pd.DataFrame(np.random.randn(6, 3)) df.loc[2:, 1] = NA df.loc[4:, 2] = NA df df.fillna(method='ffill') df.fillna(method='ffill', limit=2) #using fillna and passing mean/median data = np.Series([1., NA, 3.5, NA, 7]) data.fillna(data.mean())
ginf_df = ginf_df[ginf_df['adv_stats'] == True] ginf_df = ginf_df[['ht', 'at', 'fthg', 'ftag']] # Choose only necessary columns ginf_df.head(5) def get_winner(sr): if sr.fthg > sr.ftag: return 'home' elif sr.fthg < sr.ftag: return 'away' return 'tie' # Add winner column winner_list = [] for index, row in ginf_df.iterrows(): winner_list.append(get_winner(row)) ginf_df = ginf_df.assign(winner=pd.Series(winner_list).values) # Combine into text dataset df = events_df.join(ginf_df) df.head(3) print(events_df.shape) print(ginf_df.shape) print(df.shape) # Write to file with only necessary columns df.to_csv( "drive/Team Drives/Deep Learning Project/ken_cnn/text_dataset_60min.csv", sep='\t')
import numpy as np import pandas as pd s = pd.Series(np.arrange(100,105), ['a','b','c','d','e']) s.drop('k') #복사본 반환 s s.drop('k', inplace=True) #바로 적용 s[['a','b']] = [300,900] s1 = pd.Series(np.Series(np.arrange(100,105)) s1[1:3] s2['c':'d'] #마지막 포함
corner_kick.append(0) isFoul.append(0) isYellowCard1.append(0) isYellowCard2.append(0) straight_red_card.append(0) substitution.append(0) free_kick_awarded.append(0) off_sides.append(0) is_hand_ball.append(0) penalty_awarded.append(0) key_pass.append(0) failed_through_ball.append(0) sending_off.append(0) #Create new columns for the 15 additional events. df["Attempted_Shot"] = pd.Series(attempted_shot) df["Corner_Kick"] = pd.Series(corner_kick) df["Foul"] = pd.Series(isFoul) df["First_Yellow_Card"] = pd.Series(isYellowCard1) df["Second_Yellow_Card"] = pd.Series(isYellowCard2) df["Straight_Red_Card"] = pd.Series(straight_red_card) df["Substitution"] = pd.Series(substitution) df["Free_Kick_Awarded"] = pd.Series(free_kick_awarded) df["Off_Sides"] = pd.Series(off_sides) df["Hand_Ball"] = pd.Series(is_hand_ball) df["Penalty_Awarded"] = pd.Series(penalty_awarded) df["Key_Pass"] = pd.Series(key_pass) df["Failed_Through_Ball"] = pd.Series(failed_through_ball) df["Sending_Off"] = pd.Series(sending_off) df["Own_Goal"] = pd.Series(own_goal)