from numpy import where, mean, std, c_, array from pandas.core.frame import DataFrame from mlfwk.utils import split_random, get_project_root from collections import Counter from mlfwk.models import knn from mlfwk.metrics import metric from mlfwk.visualization import generate_space, coloring from matplotlib.colors import ListedColormap from mlfwk.readWrite import load_base from mlfwk.utils import normalization if __name__ == '__main__': print("run coluna 3 classes") # carregar a base base = load_base(path='column_3C_weka.arff', type='arff') # features features = ['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis'] print(base.info()) # ----------------------------- Clean the data ---------------------------------------------------------------- # -------------------------- Normalization ------------------------------------------------------------------ # normalizar a base base[features] = normalization(base[features], type='min-max') base = base.drop(['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope'], axis=1)
'best_cf': [], 'alphas': [] } results = { 'realization': [], 'ACCURACY': [], # 'MCC': [], 'f1_score': [], 'precision': [], 'recall': [], 'cf': [], 'alphas': [] } # carregar a base base = load_base(path='iris.data', type='csv') # normalizar a base base[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']] = normalization( base[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']], type='min-max') N, M = base.shape C = len(base['Species'].unique()) y_out_of_c = pd.get_dummies(base['Species']) base = base.drop(['Species'], axis=1) base = concatenate([base[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']], y_out_of_c], axis=1) for realization in range(20): train, test = split_random(base, train_percentage=.8)
from pandas import DataFrame from matplotlib.colors import ListedColormap from numpy import where, append, ones, array, zeros, mean, argmax, linspace, concatenate, c_, std from mlfwk.metrics import metric from mlfwk.readWrite import load_base from mlfwk.utils import split_random, get_project_root, normalization, out_of_c_to_label from mlfwk.models import MultiLayerPerceptron from mlfwk.visualization import generate_space, coloring if __name__ == '__main__': print("run emt") # --------------------------- Read dataset ---------------------------------------- df = load_base('measures_v2.csv', type='csv') df = df.drop(['profile_id'], axis=1) df = df.iloc[:100000] nRow, nCol = df.shape print(f'There are {nRow} rows and {nCol} columns') df.info() features = ['u_q', 'coolant', 'u_d', 'motor_speed', 'i_d', 'i_q', 'ambient', 'torque'] # 'profile_id' targets = ['stator_yoke', 'pm', 'stator_winding', 'stator_tooth'] # -------------------- Realiztions ---------------------------------------------
'ACCURACY': [], # 'MCC': [], 'f1_score': [], 'precision': [], 'recall': [], # 'cf': [], 'alphas': [] } # carregar a base columns = [] for i in range(34): columns.append('x' + str(i)) columns.append('y') base = load_base(path='dermatology.data', column_names=columns, type='csv') # features features = columns[:len(columns) - 1] print(base.info()) # ----------------------------- Clean the data ---------------------------------------------------------------- # The Age has values ? for unique_value in base['x33'].unique(): if unique_value != '?': base['x33'][base['x33'] == unique_value] = int(unique_value) # ? -> mean of column base['x33'][base['x33'] == '?'] = int( np.mean(base['x33'][base['x33'] != '?']))
'R2': [], 'std R2': [], 'alphas': [] } results = { 'realization': [], 'MSE': [], 'RMSE': [], 'R2': [], 'alphas': [] } # --------------------------- Read dataset ---------------------------------------- df = load_base('abalone.csv', type='csv') # The age of abalone is 1.5 + the rings df['age'] = df.Rings + 1.5 # so after the calculate the age, drop the Rings column df.drop('Rings', axis=1, inplace=True) # Label enconding of sex feature df.Sex = df.Sex.replace({"M": 1, "I": 0, "F": -1}) df.info() features = [ 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Sex'
'alphas': [] } results = { 'realization': [], 'ACCURACY': [], # 'MCC': [], 'f1_score': [], 'precision': [], 'recall': [], # 'cf': [], 'alphas': [] } # carregar a base base = load_base(path='breast-cancer-wisconsin.data', type='csv') base = base.drop(['Sample code number'], axis=1) # features features = [ 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses' ] print(base.info()) # ----------------------------- Clean the data ---------------------------------------------------------------- # The values at the column Bare Nuclei are all strings so we have to transform to int each of them.
from mlfwk.visualization import generate_space, coloring if __name__ == '__main__': print("run car fuel") final_result = { 'MSE': [], 'std MSE': [], 'RMSE': [], 'std RMSE': [], 'R2': [], 'std R2': [] } results = {'realization': [], 'MSE': [], 'RMSE': [], 'R2': []} df = load_base('measurements.csv', type='csv') # ---------------------------------- cleaning data base -------------------------------------- # NaN Columns new_df = df.drop(columns=['refill liters', 'refill gas', 'specials']) # specials_dummies = pd.get_dummies(new_df['specials']) # change E10 and SP98, for numerical new_df['gas_type'][new_df['gas_type'] == 'E10'] = int(0) new_df['gas_type'][new_df['gas_type'] == 'SP98'] = int(1) new_df['gas_type'] = new_df['gas_type'].astype('int') target = ['consume'] features = [