def pso_train(doc_dir: str, ref_dir: str, config): docs = sorted(os.listdir(doc_dir)) refs = sorted(os.listdir(ref_dir)) documents: List[List[List[str]]] = [] references: List[str] = [] features = [] for d, r in zip(docs, refs): doc, ref = Utils.load_document(doc_dir + "/" + d, ref_dir + "/" + r) p_doc: List[List[str]] = Utils.process_document( doc, config.use_stopwords, config.use_lemmatizer) p_doc_wo_title = Utils.process_document(Utils.remove_headings(doc), config.use_stopwords, config.use_lemmatizer) p_ref: str = Utils.join_sentences( Utils.process_document(ref, config.use_stopwords, config.use_lemmatizer)) features.append(PSO.extract_features(p_doc, config)) documents.append(p_doc_wo_title) references.append(p_ref) # Initialize a Binary PSO model. # python main.py -mode train -w_max 0.9 -w_min 0.4 -v_max 4 -v_min -4 -c1 1 -c2 1 -num_particles 2 -num_iterations 20 # -num_features 3 -summary_size 75 -similarity_score 0.12 -n_grams 1 -freq_thresh 0.4 -max_sent_thresh 0.8 -min_sent_thresh 0.2 -use_stopwords True -use_lemmatizer False -file None -index 25 # CHECK w is what model = PSO.Swarm(documents, references, n_features=config.num_features, n_particles=config.num_particles, n_iterations=config.num_iterations, w=0.9, c1=config.c1, c2=config.c2, sum_size=config.summary_size, config=config) # Train the model with extracted features. weights = model.train(features) # Generate summary with weights. rouge_scores = [0.0] * len(documents) for i, feature in enumerate(features): p_sum_idx = np.argsort(np.dot(feature, weights))[-config.summary_size:] p_sum = Utils.join_sentences([documents[i][idx] for idx in p_sum_idx]) rouge_scores[i] = Utils.calculate_rouge(p_sum, [references[i]], 1) print(rouge_scores) print(weights) return weights
fn = "history_ncert_class10/chap_3.txt" # sys.argv[1] ref_dir_n = "history_ncert_class10/annotations/chapter3" # sys.argv[2] # load file document, ref_sum = Utils.load_documents(fn, ref_dir_n) # Pre-process with Stemmer and/or Lemmatizer. processed_doc = Utils.process_document(document) processed_ref_sum = Utils.process_documents(ref_sum) # Extract features features = PSO.extract_features(processed_doc) # Initialize a Binary PSO model. model = PSO.Swarm(processed_doc, processed_ref_sum) # Train the model with extracted features. weights = model.train(features) # Generate summary with weights. p_sum_idx = np.argsort(np.dot(features, weights))[-PSO.SUMMARY_SIZE:] p_sum = Utils.generate_summary([document[idx] for idx in p_sum_idx]) p_sum1 = Utils.join_sentences([processed_doc[idx] for idx in p_sum_idx]) ref_sum = Utils.join_docs(processed_ref_sum) print("Final Rouge Score: ", Utils.calculate_rouge(p_sum1, ref_sum, 1)) f = open("predicted_summary.txt", 'w', encoding='utf-8') f.write(p_sum) f.close()
def main(dataset, run, alg): ''' :param dataset: :param run: run index :param alg :return: ''' # set seed for random, this seed is for PSO in PSO and PSOL # to evolve the solutions np.random.seed(1617 * run) # load data mat = scipy.io.loadmat('/home/nguyenhoai2/Grid/data/FSMathlab/' + dataset + '.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] # ensure that y label start from 0, not 1 num_class, count = np.unique(y, return_counts=True) n_classes = np.unique(y).shape[0] min_class = np.min(count) if np.max(y) >= len(num_class): y = y - 1 n_features = X.shape[1] # ensure that the division is the same for all algorithms, in all runs n_splits = min(min_class, 10) skf = StratifiedKFold(n_splits=n_splits, random_state=1617) to_print = 'Apply %d folds\n' % n_splits if alg == 'PSO': if n_features < 100: num_selected_features = [ i for i in range(1, n_features, n_features / 10) ] else: num_selected_features = [i for i in range(10, 110, 10)] selected_test_svm = np.array([0.0] * len(num_selected_features)) selected_test_knn = np.array([0.0] * len(num_selected_features)) full_test_svm = [] full_test_knn = [] for train_index, test_index in skf.split(X, y): to_print += '=========Fold ' + str(count) + '=========\n' X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # normalize data scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) X_train = np.nan_to_num(X_train) X_test = np.nan_to_num(X_test) normalize = preprocessing.MinMaxScaler().fit(X_train) X_train = normalize.transform(X_train) X_test = normalize.transform(X_test) X_train = np.nan_to_num(X_train) X_test = np.nan_to_num(X_test) # full results clf = svm.LinearSVC(random_state=1617) clf.fit(X_train, y_train) full_test_svm.append(np.mean(clf.predict(X_test) == y_test)) clf = KNeighborsClassifier() clf.fit(X_train, y_train) full_test_knn.append(np.mean(clf.predict(X_test) == y_test)) # prepare for PSO n_part = 50 n_iter = 1000 max_range = X_train.max(axis=0) min_range = X_train.min(axis=0) max_pos = np.tile(max_range, (n_classes, )) min_pos = np.tile(min_range, (n_classes, )) length = n_features * n_classes max_vel = np.array([0.05] * max_pos.shape[0]) min_vel = -max_vel prob = Problem.CentroidClassification(minimized=True, X=X_train, y=y_train) swarm = PSO.Swarm(n_particle=n_part, length=length, problem=prob, n_iterations=n_iter, max_pos=max_pos, min_pos=min_pos, max_vel=max_vel, min_vel=min_vel) sol, fit, loss, dist = swarm.iterate() centroids = np.reshape(sol, (n_classes, n_features)) normalize = preprocessing.MinMaxScaler().fit(centroids) centroids_n = normalize.transform(centroids) vars = np.var(centroids_n, axis=0) idx = np.argsort(vars)[::-1] for index, n_selected in enumerate(num_selected_features): X_train_selected = X_train[:, idx[0:n_selected]] X_test_selected = X_test[:, idx[0:n_selected]] # D_train = cdist(X_train, centroids) # pseu_train = np.argmin(D_train, axis=1) # print("Training accuracy: %f" %np.mean(y_train == pseu_train)) # # D_test = cdist(X_test, centroids) # pseu_test = np.argmin(D_test, axis=1) # print("Testing accuracy: %f" %np.mean(y_test == pseu_test)) clf = svm.LinearSVC(random_state=1617) clf.fit(X_train_selected, y_train) selected_test_svm[index] += np.mean( clf.predict(X_test_selected) == y_test) clf = KNeighborsClassifier() clf.fit(X_train_selected, y_train) selected_test_knn[index] += np.mean( clf.predict(X_test_selected) == y_test) selected_test_svm = np.array(selected_test_svm) / n_splits selected_test_knn = np.array(selected_test_knn) / n_splits test_svm = np.mean(full_test_svm) test_knn = np.mean(full_test_knn) print "-------------------KNN----------------------" print 'Full test: %f' % test_knn for n_features, selected_test in zip(num_selected_features, selected_test_knn): print '%d features: %f' % (n_features, selected_test) print "-------------------SVM----------------------" print 'Full test: %f' % test_svm for n_features, selected_test in zip(num_selected_features, selected_test_svm): print '%d features: %f' % (n_features, selected_test) elif alg == 'PSOL': num_selected_features = [] selected_test_knn = [] selected_test_svm = [] selected_test_embed = [] full_test_svm = [] full_test_knn = [] for train_index, test_index in skf.split(X, y): to_print += '=========Fold ' + str(count) + '=========\n' X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # normalize data scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) X_train = np.nan_to_num(X_train) X_test = np.nan_to_num(X_test) normalize = preprocessing.MinMaxScaler().fit(X_train) X_train = normalize.transform(X_train) X_test = normalize.transform(X_test) X_train = np.nan_to_num(X_train) X_test = np.nan_to_num(X_test) # full results clf = svm.LinearSVC(random_state=1617) clf.fit(X_train, y_train) full_test_svm.append(np.mean(clf.predict(X_test) == y_test)) clf = KNeighborsClassifier() clf.fit(X_train, y_train) full_test_knn.append(np.mean(clf.predict(X_test) == y_test)) # prepare for PSO n_part = 50 n_iter = 1000 max_range = X_train.max(axis=0) min_range = X_train.min(axis=0) max_pos = np.tile(max_range, (n_classes, )) max_pos = np.append(max_pos, 1.0) min_pos = np.tile(min_range, (n_classes, )) min_pos = np.append(min_pos, 0.0) length = n_features * n_classes + 1 max_vel = np.array([0.05] * max_pos.shape[0]) min_vel = -max_vel prob = Problem.CentroidClassificationLimit(minimized=True, X=X_train, y=y_train) swarm = PSO.Swarm(n_particle=n_part, length=length, problem=prob, n_iterations=n_iter, max_pos=max_pos, min_pos=min_pos, max_vel=max_vel, min_vel=min_vel) sol, fit, loss, dist = swarm.iterate() centroids = np.reshape(sol[0:n_features * n_classes], (n_classes, n_features)) n_selected_features = int(sol[n_features * n_classes] * n_features) # normalize = preprocessing.MinMaxScaler().fit(centroids) # centroids_n = normalize.transform(centroids) vars = np.var(centroids, axis=0) idx = np.argsort(vars)[::-1] X_train_selected = X_train[:, idx[0:n_selected_features]] X_test_selected = X_test[:, idx[0:n_selected_features]] centroids_selected = centroids[:, idx[0:n_selected_features]] num_selected_features.append(n_selected_features) D = cdist(X_test_selected, centroids_selected, metric='cityblock') pseu = np.argmin(D, axis=1) selected_test_embed.append(np.mean(pseu == y_test)) clf = svm.LinearSVC(random_state=1617) clf.fit(X_train_selected, y_train) selected_test_svm.append( np.mean(clf.predict(X_test_selected) == y_test)) clf = KNeighborsClassifier() clf.fit(X_train_selected, y_train) selected_test_knn.append( np.mean(clf.predict(X_test_selected) == y_test)) print selected_test_embed[-1] print full_test_knn[-1], selected_test_knn[-1] print full_test_svm[-1], selected_test_svm[-1] print "-------------------Centroid----------------------" print 'Centroid: %f' % np.mean(selected_test_embed) print "-------------------KNN----------------------" print 'Full test: %f' % np.mean(full_test_knn) print 'Select %f features with accuracy of %f' % ( np.mean(num_selected_features), np.mean(selected_test_knn)) print "-------------------SVM----------------------" print 'Full test: %f' % np.mean(full_test_svm) print 'Select %f features with accuracy of %f' % ( np.mean(num_selected_features), np.mean(selected_test_svm)) else: raise Exception('Algorithm %s has not been implemented!!!!' % alg)
import matplotlib.pyplot as plt import Utils import PSO import os directory = 'graphs/function' if not os.path.exists(directory): os.makedirs(directory) directoryboxplot = 'graphs/boxplot' if not os.path.exists(directoryboxplot): os.makedirs(directoryboxplot) particleSwarmOpt = PSO.Swarm() for inertia in Utils.constants.inertiaTypes: for function in Utils.constants.functionTypes: listselems = [] subtitlesElems = [] finals = [] for comunication in Utils.constants.comunicationTypes: listelems, finalgbestLists = particleSwarmOpt.run( inertia, comunication, function) listselems.append(listelems) subtitlesElems.append(comunication.name) finals.append(finalgbestLists) for i in range(len(listselems)): funcselems = listselems[i] plt.plot(funcselems, label=subtitlesElems[i]) plt.legend() plt.savefig( os.path.join(directory, function.name + '_' + inertia.name + '.png'))
import PSO as PSO from MyMath import distance3d # a simple 3d geometric search example # the heuristic for each particle is the inverse of distance from DESTINATION # verdict: works pretty well, runs into some trouble due to the distance function # did help me debug the PSO though DESTINATION = (25, 25, 25) geometric_mins = (0, 0, 0) geometric_maxs = (50, 50, 50) geometric_dimensions = 3 geometric_particles = 20 geometric_search = PSO.Swarm(geometric_particles, geometric_dimensions, geometric_mins, geometric_maxs) thisIteration = 0 bestFoundScore = -float(1e3000) print "Looking for point 25,25,25 " while (geometric_search.getIterations() < 100): currentParticle = geometric_search.getCurrentParticle() particleLocation = currentParticle.getPosition() distanceFromTarget = distance3d(DESTINATION, particleLocation) currentParticle.setHeuristic(-distanceFromTarget) # since heuristic is maximized geometric_search.tickCurrentParticle() if geometric_search.getIterations() > thisIteration: