def p3(): # load data data = pd.read_csv("finances.csv") # quarterly basis dates = pd.period_range("1978-09", periods=84, freq="Q") data.index = dates # plot data data.drop([], axis=1).plot(linewidth=1) plt.xlabel("Date") plt.ylabel("Earnings and Expenses") plt.title("Earnings and Expenses per Quarter") plt.show()
def p1(): # load data data = pd.read_csv("DJIA.csv") # set index as datetime date_index = pd.to_datetime(data["DATE"], format="%Y-%m-%d") data.index = date_index df = data.drop(columns=["DATE"]) # drop empty rows and change to floats df = df.dropna(subset=['VALUE']) df = df[(df["VALUE"] != ".")] df["VALUE"] = df["VALUE"].astype('float') plt.plot(df["VALUE"], lw=0.5) plt.show()
def p6(): # load data data = pd.read_csv("DJIA.csv") # set index as datetime date_index = pd.to_datetime(data["DATE"], format="%Y-%m-%d") data.index = date_index df = data.drop(columns=["DATE"]) # drop empty rows and change to floats df = df.dropna(subset=['VALUE']) df = df[(df["VALUE"] != ".")] df["VALUE"] = df["VALUE"].astype('float') # plot data windows = [30, 120, 365] plt.figure(figsize=(10, 8)) plt.plot(df, alpha=0.5, label='actual') for w in windows: plt.plot(df.rolling(window=w).max(), alpha=0.5, label=f'window = {w}') plt.title('Rolling maximums') plt.legend() plt.show()
def p5(): # load data data = pd.read_csv("DJIA.csv") # set index as datetime date_index = pd.to_datetime(data["DATE"], format="%Y-%m-%d") data.index = date_index df = data.drop(columns=["DATE"]) # drop empty rows and change to floats df = df.dropna(subset=['VALUE']) df = df[(df["VALUE"] != ".")] df["VALUE"] = df["VALUE"].astype('float') # find difference for each day and order by value diff = df - df.shift(1) s_g = diff.sort_values("VALUE", ascending=False).index[0] s_l = diff.sort_values("VALUE", ascending=True).index[0] # find difference for each month and order by value m_data = df.resample('M').first() m_diff = (m_data - m_data.shift(1)).dropna() m_g = m_diff.sort_values("VALUE", ascending=False).index[0] m_l = m_diff.sort_values("VALUE", ascending=True).index[0] print("The single day with the largest gain was " + str(s_g) + ". The single day with the largest loss was " + str(s_l) + ". The month with the largest gain was " + str(m_g) + ". The month the the largest loss was " + str(m_l) + ".")
#%%Case 3a: Unbalanced datasets for classification purpose. Following the case 1, here is the equivalent solution: from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3) #%%% import pandas as pd from sklearn.model_selection import train_test_split datafile_name = 'path_to_data_file' data = pd.read_csv(datafile_name) data=df #target_attribute = data['column_name'] target_attribute = data['mpg'] data = data.drop(columns = ['mpg'], axis = 1) X_train, X_test, y_train, y_test = train_test_split(data, target_attribute, test_size=0.2) X_train X_test y_train y_test df.applymap(lambda x: len(str(x))) pd.applymap? pd.applymaplen(X_train,X_test) #%%%To split into more than two classes such as train, test, and validation, one can do: probs = np.random.rand(len(df)) training_mask = probs < 0.7 test_mask = (probs>=0.7) & (probs < 0.85) validation_mask = probs >= 0.85 probs
import numpy as np import keras column_names = [ "subject#", "age", "sex", "test_time", "motor_UPDRS", "total_UPDRS", "Jitter(%)", "Jitter(Abs)", "Jitter:RAP", "Jitter:PPQ5", "Jitter:DDP", "Shimmer", "Shimmer(dB)", "Shimmer:APQ3", "Shimmer:APQ5", "Shimmer:APQ11", "Shimmer:DDA", "NHR", "HNR", "RPDE", "DFA", "PPE" ] data = pd.read_csv("parkinsons.csv") df = pd.DataFrame(data, columns=column_names) data = df.drop('subject#', axis=1) data1 = data.drop('age', axis=1) data2 = data1.drop('sex', axis=1) data3 = data.drop('test_time', axis=1) X = data3[[ "motor_UPDRS", "Jitter(%)", "Jitter(Abs)", "Jitter:RAP", "Jitter:PPQ5", "Jitter:DDP", "Shimmer", "Shimmer(dB)", "Shimmer:APQ3", "Shimmer:APQ5", "Shimmer:APQ11", "Shimmer:DDA", "NHR", "HNR", "RPDE", "DFA", "PPE" ]] y = data3[["total_UPDRS"]] x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) def get_model():