# In[5]: model = ho.pcr.nipalsPCR(arrX=X, Xstand=False, arrY=Y, Ystand=False, cvType=["loo"], numComp=4) # That's it, the PCR model has been computed. Now we would like to inspect the results by visualising them. We can do this using plotting functions of the separate [**hoggormPlot** package](https://hoggormplot.readthedocs.io/en/latest/). If we wish to plot the results for component 1 and component 2, we can do this by setting the input argument ``comp=[1, 2]``. The input argument ``plots=[1, 2, 3, 4, 6]`` lets the user define which plots are to be plotted. If this list for example contains value ``1``, the function will generate the scores plot for the model. If the list contains value ``2``, then the loadings plot will be plotted. Value ``3`` stands for correlation loadings plot and value ``4`` stands for bi-plot and ``6`` stands for explained variance plot. The hoggormPlot documentation provides a [description of input paramters](https://hoggormplot.readthedocs.io/en/latest/mainPlot.html). # In[6]: hop.plot(model, comp=[1, 2], plots=[1, 2, 3, 4, 6], objNames=X_objNames, XvarNames=X_varNames, YvarNames=Y_varNames) # Plots can also be called separately. # In[7]: # Plot cumulative explained variance (both calibrated and validated) using a specific function for that. hop.explainedVariance(model) # In[8]: # Plot cumulative validated explained variance for each variable in Y hop.explainedVariance(model, individual=True)
# Get the variable or columns names data_varNames = list(train_avg_df.columns) # Get the object or row names data_objNames = list ( map( int, list(train_avg_df.index)) ) model = ho.nipalsPCA(arrX=data, Xstand=False, cvType=["loo"], numComp=4) # In[17]: # %matplotlib qt5 # For zooming in and out get_ipython().run_line_magic('matplotlib', 'inline') hop.plot(model, comp=[1,2], plots=[1,2,3], objNames=data_objNames, XvarNames=data_varNames) # **Components 2 and 3 also showed considerable variation. The points are more densely packed in the direction of 2nd and 3rd components, than the first** # # Try with PCA and RandomForestRegression # In[14]: rf_pipe = make_pipeline( #StandardScaler(), PCA(n_components=10), RandomForestRegressor( n_estimators=100, max_depth= 100, n_jobs=-1) )
#HOGGORM PCA OG HOGGORMPLOT: data_varNames = ['air_temp_set_1', 'altimeter_set_1','dew_point_temperature_set_1d',\ 'pressure_set_1d','relative_humidity_set_1', 'sea_level_pressure_set_1d',\ 'wind_speed_set_1'] data_objNames = list(data_df['Station_ID_NUMERIC']) #For PCA Kan det brukes: loo = cross validation med leave one out #eller cvType=["Kfold", 4] = k-fold cross validation #Datainput standardiseres og centreres ved å sette Xstand=True #Det kalkuleres 7 prinsipale komponenter og data model = ho.nipalsPCA(arrX=model_data, numComp=7, cvType=["loo"], Xstand=True) #Tre figurer lages: 1: Scores-plott, 2: Loadings-plott, 6: Explained varinace-plott #her brukes hoggormplot: hop.plot(model, plots=[1, 2, 6], XvarNames=data_varNames, objNames=data_objNames) #SKLEARN PCA: #Data'en standardiseres ved å bruke StandardScaler() #Det brukes PCA med 7 prinsipale komponenter pipeline = Pipeline([('Scaling', StandardScaler()), ('pca', PCA(n_components=7))]) X_reduced = pipeline.fit_transform(model_data) #PLotting av de prinsipale komponentene PC1 og PC2 #Forskellige Station_ID har forskjellige farge plt.figure() plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=data_objNames) plt.xlabel('PC1') plt.ylabel('PC2')
# ### Apply PCA to our data # Now, let's run PCA on the data using the ``nipalsPCA`` class. The documentation provides a [description of the input parameters](https://hoggorm.readthedocs.io/en/latest/pca.html). Using input paramter ``arrX`` we define which numpy array we would like to analyse. By setting input parameter ``Xstand=False`` we make sure that the variables are only mean centered, not scaled to unit variance. This is the default setting and actually doesn't need to expressed explicitly. Setting paramter ``cvType=["loo"]`` we make sure that we compute the PCA model using full cross validation. ``"loo"`` means "Leave One Out". By setting paramter ``numpComp=4`` we ask for four principal components (PC) to be computed. # In[12]: model = ho.nipalsPCA(arrX=data, Xstand=False, cvType=["loo"], numComp=5) # That's it, the PCA model has been computed. Now we would like to inspect the results by visualising them. We can do this using the taylor-made plotting function for PCA from the separate [**hoggormPlot** package](https://hoggormplot.readthedocs.io/en/latest/). If we wish to plot the results for component 1 and component 2, we can do this by setting the input argument ``comp=[1, 2]``. The input argument ``plots=[1, 6]`` lets the user define which plots are to be plotted. If this list for example contains value ``1``, the function will generate the scores plot for the model. If the list contains value ``6``, the function will generate a explained variance plot. The hoggormPlot documentation provides a [description of input paramters](https://hoggormplot.readthedocs.io/en/latest/mainPlot.html). # In[15]: hop.plot(model, comp=[1, 2], plots=[1, 6]) # It is also possible to generate the same plots one by one with specific plot functions as shown below. # In[19]: hop.loadings(model, line=True) # --- # ### Accessing numerical results # Now that we have visualised the PCA results, we may also want to access the numerical results. Below are some examples. For a complete list of accessible results, please see this part of the documentation.
model = ho.nipalsPLS2(arrX=X, Xstand=False, arrY=Y, Ystand=False, cvType=["loo"], numComp=4) # That's it, the PLS2 model has been computed. Now we would like to inspect the results by visualising them. We can do this using plotting functions of the separate [**hoggormPlot** package](https://hoggormplot.readthedocs.io/en/latest/). If we wish to plot the results for component 1 and component 2, we can do this by setting the input argument ``comp=[1, 2]``. The input argument ``plots=[1, 2, 3, 4, 6]`` lets the user define which plots are to be plotted. If this list for example contains value ``1``, the function will generate the scores plot for the model. If the list contains value ``2``, then the loadings plot will be plotted. Value ``3`` stands for correlation loadings plot and value ``4`` stands for bi-plot and ``6`` stands for explained variance plot. The hoggormPlot documentation provides a [description of input paramters](https://hoggormplot.readthedocs.io/en/latest/mainPlot.html). # In[6]: hop.plot(model, comp=[1, 2], plots=[1, 2, 3, 4, 6], objNames=X_objNames, XvarNames=X_varNames, YvarNames=Y_varNames) # Plots can also be called separately. # In[7]: # Plot cumulative explained variance (both calibrated and validated) using a specific function for that. hop.explainedVariance(model) # In[8]:
# Now, let's run PCA on the data using the ``nipalsPCA`` class. The documentation provides a [description of the input parameters](https://hoggorm.readthedocs.io/en/latest/pca.html). Using input paramter ``arrX`` we define which numpy array we would like to analyse. By setting input parameter ``Xstand=False`` we make sure that the variables are only mean centered, not scaled to unit variance. This is the default setting and actually doesn't need to expressed explicitly. Setting paramter ``cvType=["loo"]`` we make sure that we compute the PCA model using full cross validation. ``"loo"`` means "Leave One Out". By setting paramter ``numpComp=4`` we ask for four principal components (PC) to be computed. # In[7]: model = ho.nipalsPCA(arrX=data, Xstand=False, cvType=["loo"], numComp=4) # That's it, the PCA model has been computed. Now we would like to inspect the results by visualising them. We can do this using the taylor-made plotting function for PCA from the separate [**hoggormPlot** package](https://hoggormplot.readthedocs.io/en/latest/). If we wish to plot the results for component 1 and component 2, we can do this by setting the input argument ``comp=[1, 2]``. The input argument ``plots=[1, 2, 3, 4, 6]`` lets the user define which plots are to be plotted. If this list for example contains value ``1``, the function will generate the scores plot for the model. If the list contains value ``2``, then the loadings plot will be plotted. Value ``3`` stands for correlation loadings plot and value ``4`` stands for bi-plot and ``6`` stands for explained variance plot. The hoggormPlot documentation provides a [description of input paramters](https://hoggormplot.readthedocs.io/en/latest/mainPlot.html). # In[8]: hop.plot(model, comp=[1, 2], plots=[1, 2, 3, 4, 6], objNames=data_objNames, XvarNames=data_varNames) # --- # ### Accessing numerical results # Now that we have visualised the PCA results, we may also want to access the numerical results. Below are some examples. For a complete list of accessible results, please see this part of the documentation. # In[9]: # Get scores and store in numpy array scores = model.X_scores()