X = pd.read_csv('../../data/simulated/mvnsim/mvnsim' + dataset + '.csv', sep=',', header=0, index_col=0) y = np.load('../../data/simulated/mvnsim/target' + dataset + '.npy') #print(y) #print(y.shape) #print(X.shape) distribution_boxplot( X, y, "Initial category 1 distribution of dataset %s" % dataset, "Initial category 0 distribution of distribution %s" % dataset, #output='show' #output='plotly', #ply_title="Initial distribution of dataset %s" % dataset, output='save', path='%sinitialdist_%s.png' % (filepath, nowtime)) print('\nBoxplot of initial data for dataset %s saved.' % dataset) ## PREPROCESSING ## #Scale initial data to centre data X_scaled = scale(X) X_scaled_df = pd.DataFrame.from_records(X_scaled) distribution_boxplot(
#path='%sinitialdist.png' % filepath, ) print('\nBoxplot of initial data for dataset %s saved.' % dataset) ''' ## PREPROCESSING ## #Scale initial data to centre data X_scaled = scale(X) X_scaled_df = pd.DataFrame.from_records(X_scaled) distribution_boxplot(X_scaled_df, y, "Scaled category 1 distribution of dataset %s" % dataset, "Scaled category 0 distribution of dataset %s" % dataset, #output='show' output='save', path='../../figs/out/%s/%s/scaledist.png' % (scriptname, dataset) ) #print(X_scaled.shape) print('\nBoxplot of scaled data for dataset %s saved.' % dataset) #Initiate KPCAwith various kernels # As I'm using 500 variables, 0.002 is the default gamma (1/n_variables) # I only explicitly state it at this point so I can display it on graphs gamma = 0.002 #compute kernels not preloaded into kpca #laplacian K_lap = laplacian_kernel(X_scaled, gamma=gamma)
#Import toy data and target X = pd.read_csv('../../data/simulated/mvnsim/mvnsim' + dataset + '.csv', sep=',', header=0, index_col=0) y = np.load('../../data/simulated/mvnsim/target' + dataset + '.npy') #print(y) #print(y.shape) #print(X.shape) distribution_boxplot( X, y, "Initial category 1 distribution of dataset %s" % dataset, "Initial category 0 distribution of dataset %s" % dataset, #output='show' #output='plotly', #ply_title="Initial distribution of dataset %s" % dataset, output='save', path='%sinitialdist.png' % filepath, ) print('\nBoxplot of initial data for dataset %s saved.' % dataset) ## PREPROCESSING ## #Scale initial data to centre data X_scaled = scale(X) X_scaled_df = pd.DataFrame.from_records(X_scaled)
#print(y) #X2, y2 = target_split(inp_csv, 500) print('\nShape of mvn dataframe: %s\n' % (X2.shape, )) #print(X2) print('\nShape of make_classification target array: %s\n' % (y2.shape, )) #print(y2) plt.figure(figsize=(50, 15)) plt.subplot(2, 1, 1) img1 = sns.boxplot(data=inp_csv[inp_target == 1]) plt.title("Category A distribution of dataset 015", fontsize=20) plt.subplot(2, 1, 2) img2 = sns.boxplot(data=inp_csv[inp_target == 0]) plt.title("Category B distribution of dataset 015", fontsize=20) #plt.savefig('../../data/simulated/mvnsim/mvnsim%sdist.png' % simname) plt.show() plt.close() distribution_boxplot( inp_csv, inp_target, "Category A distribution of dataset 015", "Category B distribution of dataset 015", output='show', )