def main():

    opt = get_opt()
    if opt.gpu > -1: torch.cuda.set_device(opt.gpu)

    run_time_result = [[] for _ in range(opt.run_split_num)]

    all_list = []

    for iter_split_seed in range(opt.run_split_num):

        target_data = load_processed_data(
            opt,
            opt.data_path,
            opt.data_name,
            shuffle_seed=opt.shuffle_seed_list[iter_split_seed])
        setattr(opt, 'num_feature', target_data.num_features)
        setattr(opt, 'num_class', target_data.num_classes)

        adj = target_data.edge_index

        for iter_init_seed in range(opt.run_init_num):

            set_seed(seed_list[iter_init_seed], opt.gpu > -1)
            model = get_model(opt)

            best_model = train(model, opt, target_data, adj)

            test_acc = test(opt, best_model, target_data, adj,
                            target_data.test_mask, 'test')

            test_acc = round(test_acc, 4)

            print(test_acc)
示例#2
0
        y2_test = np.vstack((y2_test, y2_test_tmp))

    X_test = X_test[1:]
    y1_test = y1_test[1:]
    y2_test = y2_test[1:]

    return X_test, (y1_test, y2_test), (label2class1, label2class2)


if __name__ == '__main__':
    import matplotlib.pyplot as plt
    from load_data import load_processed_data, set_data

    dir_path = os.path.join('c:\\', 'Users', 'aviat', 'Google Drive', 'dl4us',
                            'prj')
    data = load_processed_data(dir_path, cities=('nyc', 'kyoto'), verbose=1)
    (X_train, X_valid,
     X_test), (y1_train, y1_valid,
               y1_test), (y2_train, y2_valid,
                          y2_test), (label2class1, label2class2) = set_data(
                              data, verbose=1)

    train_gen, steps_per_epoch = create_generator(
        X_train, (y1_train, y2_train), batch_size=32)

    X_gen, (y1_gen, y2_gen) = next(train_gen)

    we_city = zip(
        list(map(lambda i: label2class1[i][1], np.argmax(y1_gen, axis=1))),
        list(map(lambda i: label2class2[i][1], np.argmax(y2_gen, axis=1))))
    for i, (w_or_e, city) in enumerate(we_city):
from sklearn.cross_validation import train_test_split
from gensim.models.word2vec import Word2Vec
from load_data import load_train_data, load_processed_data
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# The following skills is useful
# train_test_split(np.array(texts), np.array(sentiemnt), test_size=0.2)

x_train, y_train = load_processed_data(stem=False)
x_test, y_test = load_processed_data(data_type='test', stem=False)

from preprocess import preprocessor as preprocess

n_dim = 300
scaling = False


# Build word vector for training set by using the average value of all word vectors in the tweet, then scale
from load_data import load_word_embedding

imdb_w2v = load_word_embedding()


def buildWordVector(text, size):
    vec = np.zeros(size).reshape((1, size))
    count = 0.
    for word in text:
        try:
            vec += imdb_w2v[word].reshape((1, size))
            count += 1.
from sklearn.cross_validation import train_test_split
from gensim.models.word2vec import Word2Vec
from load_data import load_train_data, load_processed_data
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# The following skills is useful
# train_test_split(np.array(texts), np.array(sentiemnt), test_size=0.2)

x_train, y_train = load_processed_data(stem=False)
x_test, y_test = load_processed_data(data_type='test', stem=False)

from preprocess import preprocessor as preprocess

n_dim = 300
scaling = False

# Build word vector for training set by using the average value of all word vectors in the tweet, then scale
from load_data import load_word_embedding

imdb_w2v = load_word_embedding()


def buildWordVector(text, size):
    vec = np.zeros(size).reshape((1, size))
    count = 0.
    for word in text:
        try:
            vec += imdb_w2v[word].reshape((1, size))
            count += 1.
        except KeyError:
示例#5
0
from load_data import load_processed_data
from qrcode_generator import to_qrcode
import numpy as np

texts, labels = load_processed_data(data_type='train', stem=False)
feature_vec = []
i = 0
for text, label in zip(texts, labels):
    text_qrcode = to_qrcode(text)
    text_qrcode = np.array(list(text_qrcode.getdata()))
    text_qrcode[text_qrcode > 0] = 1
    feature_vec.append(np.append(label, text_qrcode))

from save_data import csv_save

csv_save(feature_vec, './data/traindata/qrcode_20000.csv')
示例#6
0
__author__ = 'hs'
__author__ = 'NLP-PC'
import feature_generating
import classifiers
import analysis
from load_data import load_train_data, load_processed_data
from load_data import load_test_data
from save_data import dump_picle
from vectorizers import TFIDF_estimator, anew_estimator
from analysis import analysis_result
from classifiers import mNB

print('Start')
vectorizer = anew_estimator()
texts, train_labels = load_processed_data()
transformed_train = vectorizer.fit_transform(texts)
testdata, true_labels = load_processed_data(data_type='test')
transformed_test = vectorizer.transform(testdata)

predict = mNB(transformed_train, train_labels, transformed_test)

analysis_result(predict, true_labels)
示例#7
0
            print(
                'returned: extracted_data\n - extracted_data: {} length tuple whose element\'s shape is (city_name, (pld_train, pld_test), (npld_train, npld_test))'
                .format(len(cities)))
        else:
            print(
                'returned: extracted_data\n - extracted_data: {} length tuple whose element\'s shape is (city_name, (pld_train, pld_test))'
                .format(len(cities)))

    return extracted_data  # original_data


if __name__ == '__main__':
    from load_data import load_processed_data

    dir_path = os.path.join('c:\\', 'Users', 'aviat', 'Google Drive', 'dl4us',
                            'prj')

    data = load_processed_data(dir_path, cities=('kyoto', ))
    tmp_data = (('kyoto', data[0][1][:10]), )

    extracted_data, original_data = extract(
        tmp_data,
        weights='places',
        pooling='avg',
        test_size=0.1,
        random_state=42,
        augment=True,
        augment_mode=0,
        augment_times=1,
    )
__author__ = 'hs'
__author__ = 'NLP-PC'
import feature_generating
import classifiers
import analysis
from load_data import load_train_data, load_processed_data
from load_data import load_test_data
from save_data import dump_picle
from vectorizers import TFIDF_estimator, anew_estimator
from analysis import analysis_result
from classifiers import mNB

print('Start')
vectorizer = anew_estimator()
texts, train_labels = load_processed_data()
transformed_train = vectorizer.fit_transform(texts)
testdata, true_labels = load_processed_data(data_type='test')
transformed_test = vectorizer.transform(testdata)

predict = mNB(transformed_train, train_labels, transformed_test)


analysis_result(predict, true_labels)