def synthetic_data(): print("synthetic_data task") # Train the simple copy task. V = 11 criterion = model_help.LabelSmoothing(size=V, padding_idx=0, smoothing=0.1) criterion.cuda() model = model_help.make_model(V, V, N=2) model.cuda() model_opt = model_help.NoamOpt( model.src_embed[0].d_model, 1, 400, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) for epoch in range(10): model.train() run_epoch( data_help.data_gen(V, 30, 20), model, model_help.SimpleLossCompute(model.generator, criterion, args.device, model_opt)) model.eval() eval_loss = run_epoch( data_help.data_gen(V, 30, 5), model, model_help.SimpleLossCompute(model.generator, criterion, args.device, None)) print("eval loss: %f" % eval_loss.numpy()) model.eval() src = Variable(torch.LongTensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])) src_mask = Variable(torch.ones(1, 1, 10)) print( greedy_decode(model, src.to(args.device), src_mask.to(args.device), max_len=10, start_symbol=1))
def main(): os.makedirs('checkpoint', exist_ok=True) V = 11 num_epochs = 10 batch_size = 30 train_batches = 20 test_batches = 5 criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0) model = make_model(V, V, N=2).to(device) model_opt = NoamOpt( model.src_embed[0].d_model, 1, 400, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) for epoch in range(num_epochs): # training model.train() train_loss = run_epoch( data_gen(V, batch_size=batch_size, nbatches=train_batches), model, SimpleLossCompute(model.generator, criterion, model_opt)) experiment.log_metric('train_loss', train_loss, step=epoch) # validation model.eval() valid_loss = run_epoch( data_gen(V, batch_size=30, nbatches=test_batches), model, SimpleLossCompute(model.generator, criterion, None)) experiment.log_metric('valid_loss', valid_loss, step=epoch) print('valid_loss:', valid_loss) torch.save(model.state_dict(), 'checkpoint/model.pt')
def gridsearch(params): max_sequence_length = reader.max_sentence_length random_init = True if not(params.wordvec_initialization == 'random'): random_init = False train_test_val= reader.create_batch(embedding_params = embedding_params,batch_size = -1) training_data = train_test_val['train'] test_data = train_test_val['test'] validation_data = train_test_val['dev'] # for x, y in batch_gen(training_data, max_sequence_length): # model.train_on_batch(x,y) train_x, train_y = data_gen(training_data, max_sequence_length) test_x, test_y = data_gen(test_data, max_sequence_length) val_x, val_y = data_gen(validation_data, max_sequence_length) train_y = to_categorical(train_y) test_y = to_categorical(test_y) val_y = to_categorical(val_y) dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] dropout_rate = [ 0.5, 0.9] param_grid = dict(dropout_rate=dropout_rate) # ,validation_data= (test_x, test_y) model = KerasClassifier(build_fn=createModel, nb_epoch= 1, batch_size= params.batch_size, verbose=1) grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=2) # n_jobs=-1 grid_result = grid.fit(train_x, train_y) # summarize results print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) for params, mean_score, scores in grid_result.grid_scores_: print("%f (%f) with: %r" % (scores.mean(), scores.std(), params)) experiment_results_path = 'eval/experiment_result.xlsx' xls_file = pd.ExcelFile(experiment_results_path) df1 = xls_file.parse('Sheet1') l = {'complex_mixture':0,'complex_superposition':1,'real':2} df1.ix[l[params.network_type],params.dataset_name] = max(grid_result.best_score_) df1.to_excel(experiment_results_path)
def main(): path_to_vec = '../glove/glove.6B.100d.txt' dir_name = '../' reader = SSTDataReader(dir_name, nclasses=2) embedding_params = reader.get_word_embedding(path_to_vec, orthonormalized=False) lookup_table = get_lookup_table(embedding_params) max_sequence_length = 60 sequence_input = Input(shape=(max_sequence_length, ), dtype='int32') phase_embedding = phase_embedding_layer(max_sequence_length, lookup_table.shape[0]) amplitude_embedding = amplitude_embedding_layer(np.transpose(lookup_table), max_sequence_length) # [embed_seq_real, embed_seq_imag] = ComplexMultiply()([phase_embedding, amplitude_embedding]) output = phase_embedding(sequence_input) model = Model(sequence_input, output) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.summary() train_test_val = reader.create_batch(embedding_params=embedding_params, batch_size=-1) training_data = train_test_val['train'] test_data = train_test_val['test'] validation_data = train_test_val['dev'] # for x, y in batch_gen(training_data, max_sequence_length): # model.train_on_batch(x,y) train_x, train_y = data_gen(training_data, max_sequence_length) test_x, test_y = data_gen(test_data, max_sequence_length) val_x, val_y = data_gen(validation_data, max_sequence_length) # sequence_input = Input(shape=(max_sequence_length,), dtype='int32') # path_to_vec = '../glove/glove.6B.100d.txt' # embedded_sequences = amplitude_embedding_layer(path_to_vec, 10) # output = embedded_sequences(sequence_input) # model = Model(sequence_input, output) # model.compile(loss='categorical_crossentropy', # optimizer='rmsprop', # metrics=['acc']) # model.summary() x = train_x y = model.predict(x) print(y) print(y.shape)
def init(attr=2, train_size=0.7, test_size=0.1, batch_size=25, trainable_embed=False, filename=None): #data generator #load first mini-batch """ train_set_x (45, 312, 153, 300) """ revs, W, W2, word_idx_map, vocab, mairesse, charged_words = load_data(attr) datasets = w2idx(revs, word_idx_map, mairesse, charged_words, attr, max_l=149, max_s=312, filter_h=3) _D = len(datasets[0]) _S = len(datasets[0][0]) _W = len(datasets[0][0][0]) _E = W.shape[1] dataset_idx = data_idx(len(datasets[0]), batch_size) #print(len(datasets[0])) # 2467 #exit() #split train val n_train_items = int(np.round(train_size * _D)) n_test_items = int(test_size * _D) test_idx = dataset_idx[n_train_items:n_train_items + n_test_items] val_idx = dataset_idx[n_train_items + n_test_items:] test_generator = data_gen(attr, test_idx, datasets, W, batch_size=25, test=True) if filename == None: exit() else: model = load_model(filename, custom_objects={'nll1': nll1}) return model, test_generator
def init(attr=2, train_size=0.7, test_size=0.1, batch_size=25, trainable_embed=False, filename=None): #data generator #load first mini-batch """ train_set_x (45, 312, 153, 300) """ revs, W, W2, word_idx_map, vocab, mairesse, charged_words = load_data( attr, data_aug=True) datasets = w2idx(revs, word_idx_map, mairesse, charged_words, attr, max_l=149, max_s=312, filter_h=3) _D = len(datasets[0]) _S = len(datasets[0][0]) _W = len(datasets[0][0][0]) _E = W.shape[1] dataset_idx = data_idx(len(datasets[0]), batch_size) #print(len(datasets[0])) # 2467 #exit() #split train val n_train_items = int(np.round(train_size * _D)) train_idx = dataset_idx[:n_train_items] n_test_items = int(test_size * _D) test_idx = dataset_idx[n_train_items:n_train_items + n_test_items] val_idx = dataset_idx[n_train_items + n_test_items:] train_generator = data_gen(attr, train_idx, datasets, W, batch_size=25) val_generator = data_gen(attr, val_idx, datasets, W, batch_size=25) test_generator = data_gen(attr, test_idx, datasets, W, batch_size=25) input_shape = (_S * _W, _E, 1) docs_size = _S hidden_units = [200, 200, 2] filter_hs = [1, 2, 3] filter_shapes = [] pool_sizes = [] reshape = (_S, _W) for filter_h in filter_hs: filter_shapes.append((filter_h, _E)) pool_sizes.append((_S * (_W - filter_h + 1), 1)) if filename == None: model = BigFiveCnnModel(W, filter_shapes, pool_sizes, reshape, filter_hs=filter_hs, hidden_units=hidden_units, docs_size=docs_size, trainable_embed=trainable_embed) #model.summary() opt = Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) model.compile(loss=nll1, optimizer=opt, metrics=['accuracy']) else: model = load_model(filename, custom_objects={'nll1': nll1}) steps = int(train_idx.shape[0] // batch_size) v_steps = int(val_idx.shape[0] // batch_size) return model, train_generator, val_generator, test_generator, steps, v_steps
# print(embedding_params['word2id']) lookup_table = get_lookup_table(embedding_params) max_sequence_length = reader.max_sentence_length random_init = True if not (params.wordvec_initialization == 'random'): random_init = False train_test_val = reader.create_batch(embedding_params=embedding_params, batch_size=-1) training_data = train_test_val['train'] test_data = train_test_val['test'] validation_data = train_test_val['dev'] train_x, train_y = data_gen(training_data, max_sequence_length) test_x, test_y = data_gen(test_data, max_sequence_length) val_x, val_y = data_gen(validation_data, max_sequence_length) train_y = to_categorical(train_y) test_y = to_categorical(test_y) val_y = to_categorical(val_y) def run_task(zipped_args): i, (dropout_rate, optimizer, learning_rate, init_mode, projection, batch_size, activation) = zipped_args arg_str = (" ".join([ str(ii) for ii in (dropout_rate, optimizer, learning_rate, init_mode, projection, batch_size, activation)
def complex_embedding(params): # datasets_dir, dataset_name, wordvec_initialization ='random', wordvec_path = None, loss = 'binary_crossentropy', optimizer = 'rmsprop', batch_size = 16, epochs= 4 reader = data_reader_initialize(params.dataset_name, params.datasets_dir) if (params.wordvec_initialization == 'orthogonalize'): embedding_params = reader.get_word_embedding(params.wordvec_path, orthonormalized=True) elif ((params.wordvec_initialization == 'random') | (params.wordvec_initialization == 'word2vec')): embedding_params = reader.get_word_embedding(params.wordvec_path, orthonormalized=False) else: raise ValueError('The input word initialization approach is invalid!') # print(embedding_params['word2id']) lookup_table = get_lookup_table(embedding_params) max_sequence_length = reader.max_sentence_length random_init = True if not (params.wordvec_initialization == 'random'): random_init = False if params.network_type == 'complex_superposition': model = run_complex_embedding_network_superposition( lookup_table, max_sequence_length, reader.nb_classes, random_init=random_init) elif params.network_type == 'complex_mixture': model = run_complex_embedding_network_mixture(lookup_table, max_sequence_length, reader.nb_classes, random_init=random_init) else: model = run_real_embedding_network(lookup_table, max_sequence_length, reader.nb_classes, random_init=random_init) model.compile(loss=params.loss, optimizer=params.optimizer, metrics=['accuracy']) model.summary() weights = model.get_weights() train_test_val = reader.create_batch(embedding_params=embedding_params, batch_size=-1) training_data = train_test_val['train'] test_data = train_test_val['test'] validation_data = train_test_val['dev'] # for x, y in batch_gen(training_data, max_sequence_length): # model.train_on_batch(x,y) train_x, train_y = data_gen(training_data, max_sequence_length) test_x, test_y = data_gen(test_data, max_sequence_length) val_x, val_y = data_gen(validation_data, max_sequence_length) print(len(train_x)) print(len(test_x)) print(len(val_x)) # assert len(train_x) == 67349 # assert len(test_x) == 1821 # assert len(val_x) == 872 train_y = to_categorical(train_y) test_y = to_categorical(test_y) val_y = to_categorical(val_y) history = model.fit(x=train_x, y=train_y, batch_size=params.batch_size, epochs=params.epochs, validation_data=(test_x, test_y)) val_acc = history.history['val_acc'] train_acc = history.history['acc'] if not (os.path.exists(params.eval_dir)): os.mkdir(params.eval_dir) learning_curve_path = os.path.join(params.eval_dir, 'learning_curve') epoch_indexes = [x + 1 for x in range(len(val_acc))] line_1, = plt.plot(epoch_indexes, val_acc) line_2, = plt.plot(epoch_indexes, train_acc) # plt.axis([0, 6, 0, 20]) plt.legend([line_1, line_2], ['test_acc', 'train_acc']) fig = plt.gcf() fig.savefig(learning_curve_path, dpi=fig.dpi) evaluation = model.evaluate(x=test_x, y=test_y) eval_file_path = os.path.join(params.eval_dir, 'eval.txt') with open(eval_file_path, 'w') as eval_file: eval_file.write('acc: {}, loss: {}'.format(evaluation[1], evaluation[0])) embedding_dir = os.path.join(params.eval_dir, 'embedding') if not (os.path.exists(embedding_dir)): os.mkdir(embedding_dir) np.save(os.path.join(embedding_dir, 'phase_embedding'), model.get_weights()[0]) np.save(os.path.join(embedding_dir, 'amplitude_embedding'), model.get_weights()[1]) np.save(os.path.join(embedding_dir, 'word2id'), embedding_params['word2id']) save_model(model, os.path.join(params.eval_dir, 'model')) experiment_results_path = 'eval/experiment_result.xlsx' xls_file = pd.ExcelFile(experiment_results_path) df1 = xls_file.parse('Sheet1') l = {'complex_mixture': 0, 'complex_superposition': 1, 'real': 2} df1.ix[l[params.network_type], params.dataset_name] = max(val_acc) df1.to_excel(experiment_results_path)
train_size = 0.9 attr = 2 batch_size = 25 revs, W, W2, word_idx_map, vocab, mairesse, charged_words = load_data(attr) datasets = w2idx(revs, word_idx_map, mairesse, charged_words, attr, max_l=149, max_s=312, filter_h=3) _D = len(datasets[0]) _S = len(datasets[0][0]) _W = len(datasets[0][0][0]) _E = W.shape[1] print(_D) print(_S) print(_W) print(_E) dataset_idx = data_idx(attr, len(datasets[0]), batch_size) #split train val n_train_items = int(np.round(train_size * _D)) train_idx = dataset_idx[:n_train_items] train_generator = data_gen(attr, train_idx, datasets, W, batch_size=25) [train_set_x, train_set_m], train_set_y = next(train_generator) print(train_set_x.shape)