import numpy as np import pandas as pd from scipy.io import loadmat from preprocessing import getData from MLP_auto import MLP as MLP_auto from MLP import MLP if __name__ == '__main__': X, y_cat = getData('data.mat') # m = number of feature vectors m = X.shape[0] # n = number of features n = X.shape[1] train_percent = 0.6 X_train = X[:int(train_percent * X.shape[0]), :] y_train = y_cat[:int(train_percent * X.shape[0]), :] X_test = X[int(train_percent * X.shape[0]):, :] y_test = y_cat[int(train_percent * X.shape[0]):, :] # hidden layers array Layers = [42, 24, 12] alpha = 0.5 max_iter = 30 # pretraining 3 autoencoders model11 = MLP_auto([n, Layers[0]], ['sigmoid']) print("pre-training autoencoder 1") model11.train(X_train, X_train, alpha, 12, max_iter)
import preprocessing cols = ['sentiment','id','date','query_string','user','text'] loc = "C:/Users/risha/Downloads/NLP Data/" df = pd.read_csv(loc+"emoticon.csv",header = None, names=cols,encoding = "ISO-8859-1") df = df.drop(columns=['id','date','query_string','user']) neg = random.randint(1,650000) pos = random.randint(800000,1500000) neg = df.loc[neg:(neg+100000)] pos = df.loc[pos:(pos+100000)] df = pd.concat([neg,pos]) print("Data Import Complete") dataset = preprocessing.getData(df) from sklearn.feature_extraction.text import TfidfVectorizer cv = TfidfVectorizer(max_features=1500) X = cv.fit_transform(dataset).toarray() y = df.iloc[:, 0].values print("Vectorization finished") # Training the Naive Bayes model on the Training set import sklearn.naive_bayes as nb classifier = nb.MultinomialNB() classifier.fit(X, y) print("training complete") #adding tweet functionality
# # pos = twitter_samples.strings('positive_tweets.json') # neg = twitter_samples.strings('negative_tweets.json') # posData = preprocessing.getData(pos) # negData = preprocessing.getData(neg) # # df1 = pd.DataFrame(negData,columns = ['Text']) # df1 = df1.assign(Sent=0) # # df2 = pd.DataFrame(posData,columns = ['Text']) # df2.assign(Sent=1) # # df = pd.concat([df1, df2, df3]) data = data_structuring.df dt = preprocessing.getData(data_structuring.arr) dataset = [] for list in dt: sentence = "" for words in list: sentence = sentence + " " + words dataset.append(sentence) from sklearn.feature_extraction.text import CountVectorizer cv = CountVectorizer(max_features=1000) X = cv.fit_transform(dataset).toarray() y = data.iloc[:, 1].values # Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split
5: lambda x, y, param: quadratic(x, y, param), 6: lambda x, y, param: quadratic(x, y, param), 7: lambda x, y, param: multiquadric(x, y, param), 8: lambda x, y, param: multiquadric(x, y, param), 9: lambda x, y, param: inverse_multiquadric(x, y, param), 3: lambda x, y, param: sinc(x, y), 0: lambda x, y, param: laplacean(x, y, param), }[i](x, y, param) for i in range(0, 4): f.write("Zbior " + test[i] + ' ' + train[i] + '\n') f.write( "kernel; c/gamma; tol; Wektory podpierające; jakosc zbioru trenujacego; jakosc zbiory testowego \n" ) X, y = getData(sciezka + train[i]) X_test, y_test = getData(sciezka + test[i]) for j in range(0, 10): gram = get_kernel(j, X, X, 10) clf = SVC(C=10, kernel='precomputed', tol=0.000001).fit(gram, y) print('Liczba wektorow podpierajacych: ', np.sum(clf.n_support_)) print('Jakosc klasyfikacji zbioru trenujacego: ', clf.score(gram, y)) gram_test = rbf_kernel(X_test, X, 10) print('Test gram calculated') print('Jakosc klasyfikacji zbioru testowego: ', clf.score(gram_test, y_test)) cpredicted = clf.predict(gram_test) f.write(kernels[j]) f.write(';') f.write(str(params[j])) f.write(';')
A2 = Y_tilda = sigmoid(Z2) #Backpropagation Delta_2 = (Y_tilda - Output) * sigmoidDerivative(A2) Delta_1 = W2.T.dot(Delta_2) * sigmoidDerivative(A1) #Weight and bias updation W2 = W2 - alpha * np.dot(Delta_2, A1.T) W1 = W1 - alpha * np.dot(Delta_1, Input.T) b2 = b2 - alpha * np.sum(Delta_2, axis=1, keepdims=True) b1 = b1 - alpha * np.sum(Delta_1, axis=1, keepdims=True) #Completed Training return [W1, b1, sigmoid(W1.dot(Input) + b1)] # %% X, Y = getData('data.mat') #Holdout method train_percent = 0.7 train_size = int(train_percent * X.shape[0]) train_X = X[:train_size, :] test_X = X[train_size:, :] train_Y = Y[:train_size, :] test_Y = Y[train_size:, :] train_X = train_X.T train_Y = train_Y.T test_X = test_X.T test_Y = test_Y.T # %% #deep layer stacked autoencoder based extreme learning machine.
# ---------------- Import ---------------- from preprocessing import getData, boolRain import matplotlib.pyplot as plt import tensorflow as tf import numpy as np import pandas as pd import sys # ---------------- Main ---------------- print("---------------- Rain-Predictor ----------------") print("Train \t (1)\nTest\t (2)\nUse\t (3)") menu = int(input()) if menu == 1: # Pre-processing of data x, y = getData() # Model model = tf.keras.models.Sequential() # Layers model.add(tf.keras.layers.Dense(3, input_dim=3, activation='relu')) model.add(tf.keras.layers.Dense(3, activation='relu')) model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # Complie model.compile(loss='mean_squared_error', optimizer='adam', metrics=['binary_accuracy']) # Train
from preprocessing import getData from sklearn.svm import SVR import kernelFunctions as kf #import matplotlib.pyplot as plt #from matplotlib import cm #from mpl_toolkits.mplot3d import Axes3D sciezka = './dane/' print('Uzywam sciezki: ' + sciezka) sets = [['train6.csv', 'test6.csv'], ['train7.csv', 'test7.csv']] # each entry is a list of C, gamma and degree configs = [[1, 0.1, 2], [5, 0.2, 4], [20, 0.4, 8]] for set_num in range(0, len(sets)): xlearn, rlearn = getData(sciezka + sets[set_num][0]) xtest, rtest = getData(sciezka + sets[set_num][1]) print(sets[set_num]) for c_num in range(0, len(configs)): svr_rbf = SVR(kernel='rbf', C=configs[c_num][0], gamma=configs[c_num][1]) svr_lin = SVR(kernel='linear', C=configs[c_num][0]) svr_poly = SVR(kernel='poly', C=configs[c_num][0], degree=configs[c_num][2]) r_laplacean = SVR(kernel='precomputed').fit( kf.laplacean(xlearn, xlearn, configs[c_num][1]), rlearn).predict(kf.laplacean(xtest, xlearn, configs[c_num][1])) r_sinc = SVR(kernel='precomputed').fit( kf.sinc(xlearn, xlearn, configs[c_num][0]),
resMoc = MOC4(c[j % 2], cores[j], tols[j % 4], train_mapped, test_mapped, y_train, y_test) resOvo = OneVsAll4(c[j % 2], cores[j], tols[j % 4], train_mapped, test_mapped, y_train, y_test) else: resMoc = MOC16(c[j % 2], cores[j], tols[j % 4], train_mapped, test_mapped, y_train, y_test) resOvo = OneVsAll16(c[j % 2], cores[j], tols[j % 4], train_mapped, test_mapped, y_train, y_test) writeRow(cores[j], c[j % 2], tols[j % 4], resMoc, resOvo) f = open('results.csv', 'w') #for each data set kernels = [kf.laplacean, kf.sinc, kf.quadratic, kf.multiquadric] for i in range(0, 4): x_train, y_train = getData(sciezka + train[i]) x_test, y_test = getData(sciezka + test[i]) #append header to file f.write(test[i]) f.write('\n') writeHeader(f) #for each configuration for j in range(0, 8): iteration(i, j, x_train, x_test) for j in range(8, 12): iteration(i, j, kernels[j - 8](x_train, x_train, 10), kernels[j - 8](x_test, x_test, 10)) f.close()
7: lambda x, y, param: multiquadric(x, y, param), 8: lambda x, y, param: multiquadric(x, y, param), 9: lambda x, y, param: inverse_multiquadric(x, y, param), 3: lambda x, y, param: sinc(x, y, param), 0: lambda x, y, param: laplacean(x, y, param), }[i](x, y, param) print('Uzywam sciezki: ' + sciezka) for i in range(0, 4): f.write("Zbior " + test[i] + ' ' + train[i] + '\n') f.write( "kernel; c; tol; gamma; wektory; jakosc zbioru trenujacego; jakosc zbiory testowego \n" ) for j in range(0, 10): xlearn, clearn = getData(sciezka + train[i]) xtest, ctest = getData(sciezka + test[i]) clf = SVC(C=cs[j], kernel=kernels[j], gamma=gammas[i], tol=tols[j]).fit(xlearn, clearn) print('Liczba wektorow podpierajacych: ', np.sum(clf.n_support_)) print('Jakosc klasyfikacji zbioru trenujacego: ', clf.score(xlearn, clearn)) print('Jakosc klasyfikacji zbioru testowego: ', clf.score(xtest, ctest)) cpredicted = clf.predict(xtest) print('Porownanie predykcji z faktycznymi kategoriami:') f.write(kernels[j]) f.write(';')