def evolution_drift_with_seq(df_gas, n_batch_training): ## Check the drift importance. Use the first N batch to train, and check the ## clasifications results with the others. #Train evo_train(df_gas, n_batch_training) # Test and save results in dict model_dict = {} for batch in range(n_batch_training + 1, 11): print(f'\n\n----------Batch 2-----------{batch} \n\n') seq = SeqModel() model_name = f'temp_training_{n_batch_training}_test{batch}' model = seq.load_model(model_name) ev = Evolution_drift(df_gas, n_batch_training, batch) _, X_test, _, y_test = ev.split_data() loss, acc = seq.model_evaluate(X_test, y_test) model_dict[batch] = {'acc': acc, 'loss': loss} #Plot results df_results = pd.DataFrame.from_dict(model_dict).T fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8)) fig.suptitle(f'Training with first {n_batch_training} batches') ax1 = df_results.plot(kind='bar', y='acc', ax=ax1) ax1.set_ylim([0, 1]) ax2 = df_results.plot(kind='bar', y='loss', ax=ax2) save_figure(fig, f'Step1_NBATCH_{n_batch_training}_acc_loss')
def apply_tsne(X, y, name): print('tsne2d') X_embedded = TSNE(n_components=2).fit_transform(X) fig, ax = plt.subplots(figsize=(12, 8)) ax.set_title('TSNE 2d Batch1, Sensor1, Concentration less 100ppmv') scatter = ax.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, label=y.unique()) legend1 = ax.legend(*scatter.legend_elements(), loc="upper right", title="Gas") plt.show() save_figure(fig, f'Step0_3_TSNE_2d_{name}') print('tsne3d') X_embedded = TSNE(n_components=3).fit_transform(X) fig = plt.figure(figsize=(8, 6)) ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) ax.scatter(X_embedded[:, 0], X_embedded[:, 1], X_embedded[:, 2], c=y) ax.set_title('TSNE 3d Batch1, Sensor1, Concentration less 100ppmv') legend1 = ax.legend(*scatter.legend_elements(), loc="upper right", title="Gas") plt.show() save_figure(fig, f'Step0_3_TSNE_3d_{name}')
def apply_KMeans_3d(X, y, name): pca = PCA(n_components=3) pca.fit(X, y) xp = pca.transform(X) number_of_clusters = 6 km = KMeans(n_clusters=number_of_clusters) # Normally people fit the matrix y_pred = km.fit_predict(X) #igualamos las categorias a los indices 1 al 6 del dataframe y_pred = y_pred + 1 fig = plt.figure(figsize=(8, 6)) plt.clf() ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) scatter = ax.scatter(xp[:, 0], xp[:, 1], xp[:, 2], c=y_pred) legend1 = ax.legend(*scatter.legend_elements(), loc="upper right", title="Clusters") plt.show() save_figure(fig, f'Step0_3_Color for each cluster_3d_{name}') fig = plt.figure(figsize=(8, 6)) ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) scatter = ax.scatter(xp[:, 0], xp[:, 1], xp[:, 2], c=y) legend1 = ax.legend(*scatter.legend_elements(), loc="upper right", title="Gas") plt.show() save_figure(fig, f'Step0_3_Color for each gas_3d_{name}') fig = plt.figure(3, figsize=(8, 6)) conf = confusion_matrix(y, y_pred) sns.heatmap(conf, annot=True, fmt='d') plt.title(f"Confusion matrix_{name}") plt.show()
df_conf = pd.DataFrame(data=confusion, columns=label_gas, index=label_gas) sns.heatmap(df_conf, annot=True, fmt="d", cmap='Blues') plt.title('Confusion Matrix') plt.yticks(rotation=0) plt.show() return f if __name__ == '__main__': # Check the results for the sequential Neural Net # Load data df_gas = load_data() mod1 = SeqModelSimple(df_gas) X_train, X_test, y_train, y_test = mod1.split_data() #mod1.train_and_save_model('ModelSimple') mod2 = SeqModelWithConcentration(df_gas) X_train2, X_test2, y_train2, y_test2 = mod2.split_data() #mod2.train_and_save_model('ModelWithConcentration') seq = SeqModel() seq.load_model('ModelSimple') f = plot_conf(seq.get_model(), X_test, y_test) save_figure(f, 'ConfMatrix_ModelSimple') seq = SeqModel() seq.load_model('ModelWithConcentration') f = plot_conf(seq.get_model(), X_test2, y_test2) save_figure(f, 'ConfMatrix_ModelWithConcentration')
df_temp = pd.DataFrame() for g, c in dict_gas_concentration.items(): df_select = df[(df['GAS'] == g) & (df['CONCENTRATION'] == c)] df_temp = df_temp.append(df_select) # Select only one sensor X = df_temp.iloc[:, :8] y = df_temp['GAS'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) fig = lgbm_conf_shap(X_train, X_test, y_train, y_test) save_figure(fig, 'Step4_LGBM_one_sensor') ### Now let's use all sensor data # Select all sensors X = df_temp.iloc[:, :128] y = df_temp['GAS'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) fig = lgbm_conf_shap(X_train, X_test, y_train, y_test) ### Okay, now let's train with batch 1 to 3, and predict batch 4 df_temp_train = df_temp[df_temp['Batch ID'].isin([1, 2, 3, 4, 5, 6, 7, 8])] df_temp_test = df_temp[df_temp['Batch ID'].isin([9])]
pivot.round(2) print('\n', pivot.round(2).to_markdown()) # Calculate which concentration if more common for each gas df_c = df_gas[['GAS', 'CONCENTRATION']].value_counts() df_c1 = df_c.reset_index() df_c1 = df_c1.rename(columns={0: 'count'}) idx = df_c1.groupby(['GAS'])['count'].transform(max) == df_c1['count'] result = df_c1[idx].sort_values(by='GAS') print(result) ## Plots # Show samples count per GAS fig = plot_count_per_batch_and_gas(df_gas) save_figure(fig, 'Step0_Count_Batch_Gas') plt.show() fig = plot_sample_count_per_gas(df_gas) save_figure(fig, 'Step0_Count_Gas') plt.show() ## Concentration plot (takes time to plot) fig, axes = plt.subplots(3, 2, figsize=(25, 20)) fig.suptitle('Count of measurements of each Gas and concentration') for i, ax in enumerate(axes.flatten(), start=1): print(i) concentration_plot_count(ax, df_gas, gas=i) plt.tight_layout() plt.show() save_figure(fig, 'Step0_Concentration Distribution per gas')
from python.StandardFigure import save_figure from python.LoadSensorData import get_sensors_list if __name__ == '__main__': # Cargo datos df = load_data() corr = df.iloc[:, :128].corr() fig = plt.figure(figsize=(20,20)) sns.heatmap(corr, vmin=-1, vmax=1, cmap='coolwarm', square=True) plt.title('Correlation Between Features') plt.show() save_figure(fig, 'Step0_1_1_CorrelationBetweenFeatures') # Obtenemos un dataframe con solo un sensor de Tipo I, II, III y IV df_sens = get_sensors_list([0, 2, 4, 6]) sensors_features = df_sens.drop(['Batch ID', 'GAS','CONCENTRATION'],axis=1) fig = plt.figure(figsize=(20, 20)); ax = sns.heatmap(sensors_features.corr(), vmin=-1, vmax=1, annot=True, cmap='coolwarm'); ax.set_title('Correlation between SensorPack1') plt.show() save_figure(fig, 'Step0_1_1_CorrelationBetweenFeatures_Data1') # Otra combinacion de 4 sensores df_sens = get_sensors_list([1, 3, 5, 7]) sensors_features = df_sens.drop(['Batch ID', 'GAS', 'CONCENTRATION'], axis=1) fig = plt.figure(figsize=(20, 20));
#Train neural net for each infividual sensor create_model_sensors() # load models modP1, confP1 = load_and_test_model_pack('Seq_SensorP1', dict_sensors['SensorP1']) modP2, confP2 = load_and_test_model_pack('Seq_SensorP2', dict_sensors['SensorP2']) modP3, confP3 = load_and_test_model_pack('Seq_SensorP3', dict_sensors['SensorP3']) modP4, confP4 = load_and_test_model_pack('Seq_SensorP4', dict_sensors['SensorP4']) modA, confA = load_and_test_model_pack('Seq_TypeA', dict_sensors['TypeA']) modB, confB = load_and_test_model_pack('Seq_TypeB', dict_sensors['TypeB']) modC, confC = load_and_test_model_pack('Seq_TypeC', dict_sensors['TypeC']) modD, confD = load_and_test_model_pack('Seq_TypeD', dict_sensors['TypeD']) # Los 4 modelos tienen la misma accuracy, pero, ¿y la matrix de confusion? fig = plt.figure(); ax = sns.heatmap(confP1, annot=True); plt.title('Seq_SensorP1'); plt.show(); save_figure(fig,'Conf_Seq_SensorP1' ) fig = plt.figure(); ax = sns.heatmap(confP2, annot=True); plt.title('Seq_SensorP2'); plt.show(); save_figure(fig,'Conf_Seq_SensorP2' ) fig = plt.figure(); ax = sns.heatmap(confP3, annot=True); plt.title('Seq_SensorP3'); plt.show(); save_figure(fig,'Conf_Seq_SensorP3' ) fig = plt.figure(); ax = sns.heatmap(confP4, annot=True); plt.title('Seq_SensorP4'); plt.show(); save_figure(fig,'Conf_Seq_SensorP4' ) # let's see the result for each sensor type fig = plt.figure(); ax = sns.heatmap(confA, annot=True); plt.title('Seq_TypeA'); plt.show(); save_figure(fig,'Conf_Seq_TypeA') fig = plt.figure(); ax = sns.heatmap(confB, annot=True); plt.title('Seq_TypeB'); plt.show(); save_figure(fig,'Conf_Seq_TypeB') fig = plt.figure(); ax = sns.heatmap(confC, annot=True); plt.title('Seq_TypeC'); plt.show(); save_figure(fig,'Conf_Seq_TypeC') fig = plt.figure(); ax = sns.heatmap(confD, annot=True); plt.title('Seq_TypeD'); plt.show(); save_figure(fig,'Conf_Seq_TypeD') # Nos quedamos con el numero de aciertos (la diagonal) diagA = np.diag(confA) diagB = np.diag(confB) diagC = np.diag(confC) diagD = np.diag(confD)