def main(): opt = get_opt() if opt.gpu > -1: torch.cuda.set_device(opt.gpu) run_time_result = [[] for _ in range(opt.run_split_num)] all_list = [] for iter_split_seed in range(opt.run_split_num): target_data = load_processed_data( opt, opt.data_path, opt.data_name, shuffle_seed=opt.shuffle_seed_list[iter_split_seed]) setattr(opt, 'num_feature', target_data.num_features) setattr(opt, 'num_class', target_data.num_classes) adj = target_data.edge_index for iter_init_seed in range(opt.run_init_num): set_seed(seed_list[iter_init_seed], opt.gpu > -1) model = get_model(opt) best_model = train(model, opt, target_data, adj) test_acc = test(opt, best_model, target_data, adj, target_data.test_mask, 'test') test_acc = round(test_acc, 4) print(test_acc)
y2_test = np.vstack((y2_test, y2_test_tmp)) X_test = X_test[1:] y1_test = y1_test[1:] y2_test = y2_test[1:] return X_test, (y1_test, y2_test), (label2class1, label2class2) if __name__ == '__main__': import matplotlib.pyplot as plt from load_data import load_processed_data, set_data dir_path = os.path.join('c:\\', 'Users', 'aviat', 'Google Drive', 'dl4us', 'prj') data = load_processed_data(dir_path, cities=('nyc', 'kyoto'), verbose=1) (X_train, X_valid, X_test), (y1_train, y1_valid, y1_test), (y2_train, y2_valid, y2_test), (label2class1, label2class2) = set_data( data, verbose=1) train_gen, steps_per_epoch = create_generator( X_train, (y1_train, y2_train), batch_size=32) X_gen, (y1_gen, y2_gen) = next(train_gen) we_city = zip( list(map(lambda i: label2class1[i][1], np.argmax(y1_gen, axis=1))), list(map(lambda i: label2class2[i][1], np.argmax(y2_gen, axis=1)))) for i, (w_or_e, city) in enumerate(we_city):
from sklearn.cross_validation import train_test_split from gensim.models.word2vec import Word2Vec from load_data import load_train_data, load_processed_data import numpy as np from sklearn.preprocessing import MinMaxScaler # The following skills is useful # train_test_split(np.array(texts), np.array(sentiemnt), test_size=0.2) x_train, y_train = load_processed_data(stem=False) x_test, y_test = load_processed_data(data_type='test', stem=False) from preprocess import preprocessor as preprocess n_dim = 300 scaling = False # Build word vector for training set by using the average value of all word vectors in the tweet, then scale from load_data import load_word_embedding imdb_w2v = load_word_embedding() def buildWordVector(text, size): vec = np.zeros(size).reshape((1, size)) count = 0. for word in text: try: vec += imdb_w2v[word].reshape((1, size)) count += 1.
from sklearn.cross_validation import train_test_split from gensim.models.word2vec import Word2Vec from load_data import load_train_data, load_processed_data import numpy as np from sklearn.preprocessing import MinMaxScaler # The following skills is useful # train_test_split(np.array(texts), np.array(sentiemnt), test_size=0.2) x_train, y_train = load_processed_data(stem=False) x_test, y_test = load_processed_data(data_type='test', stem=False) from preprocess import preprocessor as preprocess n_dim = 300 scaling = False # Build word vector for training set by using the average value of all word vectors in the tweet, then scale from load_data import load_word_embedding imdb_w2v = load_word_embedding() def buildWordVector(text, size): vec = np.zeros(size).reshape((1, size)) count = 0. for word in text: try: vec += imdb_w2v[word].reshape((1, size)) count += 1. except KeyError:
from load_data import load_processed_data from qrcode_generator import to_qrcode import numpy as np texts, labels = load_processed_data(data_type='train', stem=False) feature_vec = [] i = 0 for text, label in zip(texts, labels): text_qrcode = to_qrcode(text) text_qrcode = np.array(list(text_qrcode.getdata())) text_qrcode[text_qrcode > 0] = 1 feature_vec.append(np.append(label, text_qrcode)) from save_data import csv_save csv_save(feature_vec, './data/traindata/qrcode_20000.csv')
__author__ = 'hs' __author__ = 'NLP-PC' import feature_generating import classifiers import analysis from load_data import load_train_data, load_processed_data from load_data import load_test_data from save_data import dump_picle from vectorizers import TFIDF_estimator, anew_estimator from analysis import analysis_result from classifiers import mNB print('Start') vectorizer = anew_estimator() texts, train_labels = load_processed_data() transformed_train = vectorizer.fit_transform(texts) testdata, true_labels = load_processed_data(data_type='test') transformed_test = vectorizer.transform(testdata) predict = mNB(transformed_train, train_labels, transformed_test) analysis_result(predict, true_labels)
print( 'returned: extracted_data\n - extracted_data: {} length tuple whose element\'s shape is (city_name, (pld_train, pld_test), (npld_train, npld_test))' .format(len(cities))) else: print( 'returned: extracted_data\n - extracted_data: {} length tuple whose element\'s shape is (city_name, (pld_train, pld_test))' .format(len(cities))) return extracted_data # original_data if __name__ == '__main__': from load_data import load_processed_data dir_path = os.path.join('c:\\', 'Users', 'aviat', 'Google Drive', 'dl4us', 'prj') data = load_processed_data(dir_path, cities=('kyoto', )) tmp_data = (('kyoto', data[0][1][:10]), ) extracted_data, original_data = extract( tmp_data, weights='places', pooling='avg', test_size=0.1, random_state=42, augment=True, augment_mode=0, augment_times=1, )