) # Dimensionality reduction using MCA: # Applies only for categoric columns cols_by_type = sysarmy_analysis.group_cols_by_type() cols_categoric = sysarmy_analysis.get_cols_by_type(cols_by_type, ["object"]) sysarmy_analysis.reduction_dims( cols_categoric, method="mca", final_number_dims=2, visualize=True ) sysarmy_analysis.clusterization( cols_to_standard, method="dbscan", visualize=True ) sysarmy_analysis.dummy_cols_from_text(col="tecnologies", sep=",", n_cols=15) print(sysarmy_analysis) # Salary prediction with linear regression with cleaned columns, no dim reduction sysarmy_analysis.linear_regression( col_to_predict="sueldo_mensual_bruto_ars", cols_to_remove=["PC1", "PC2", "MC1", "MC2"], graph=True, )
stackoverflow_analysis.replace_missing(all_cols, method='remove') stackoverflow_analysis.replace_outliers(cols_numeric, method='drop_iqr') # stackoverflow_analysis.describe(graph=True) # ---------------------------------------------------------------------------------- # Data processing all_cols_to_standard = cols_numeric stackoverflow_analysis.standardize(all_cols_to_standard, 'z_score') # Dimensionality reduction using PCA: # Applies only for numeric columns, requires standardized values stackoverflow_analysis.reduction_dims(all_cols_to_standard, method='pca', final_number_dims=2, visualize=True) stackoverflow_analysis.clusterization(all_cols_to_standard, method='dbscan', visualize=True) # stackoverflow_analysis.dummy_cols_from_text(col='technologies', sep=',', n_cols=15) # print(stackoverflow_analysis) # ---------------------------------------------------------------------------------- # stackoverflow_analysis.reset() # print(stackoverflow_analysis) # stackoverflow_analysis.save(output_path / 'stackoverflow_survey_analysed.csv') # print(stackoverflow_analysis)