print('window_size = ', window_size) print('num_walks_per_node = ', num_walks_per_node) # Load the config file cfg = yaml.load(open('config.yml', 'r')) ######################################### # Set up random seeds # ######################################### random.seed(252) np.random.seed(252) ######################################### # Load graph data # ######################################### num_nodes, num_edges, node_list, edges, node_type, type_idx = data_helpers.load_dataset( cfg['path_data'], dataset) mp_idx = gen_mp_candidates(edges, node_type, 10, window_size) print('{} different metapaths.'.format(len(mp_idx)), file=sys.stderr) # print(mp_idx) num_metapaths = len(mp_idx) + 1 types = ['v', 'a', 'i', 'f'] ######################################### # MARU walks # ######################################### out_file = cfg['path_walks'] + 'maru_walks.{}.L{}.W{}.S{}'.format( dataset, walk_len, window_size, num_walks_per_node) with open(out_file, 'w') as wp: for walk_itr in range(num_walks_per_node): print('Iteration #{}'.format(walk_itr), file=sys.stderr)
import numpy as np import data_helpers as dh import matplotlib.pyplot as plt import os import tensorflow as tf import datetime as dt X,Y,files = dh.load_dataset('shared/Digits_1f1',(32,32),1) # Parameters learning_rate = 0.001 batch_size = 64 training_iters =500 display_step = 50 # Network Parameters n_input = 32*32 n_classes = 10 # dropout = 0.75 # Dropout, probability to keep units # tf Graph input x = tf.placeholder(tf.float32, [None, n_input]) y = tf.placeholder(tf.float32, [None, n_classes]) keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) # Create model def conv2d(img, w, b): return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(img, w, strides=[1, 1, 1, 1], padding='SAME'),b))
######################################### # Load arguments and configure file # ######################################### # Process CML arguments FLAGS = handle_flags() random.seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) tf.set_random_seed(FLAGS.random_seed) file_path = "../../data/" train_index_list = list(map(int, FLAGS.train_data_set.split(','))) test_index_list = list(map(int, FLAGS.test_data_set.split(','))) fea_train, lbl_train, fea_valid, lbl_valid, fea_test, lbl_test = data_helpers.load_dataset( file_path, train_index_list, test_index_list, FLAGS.start_time, FLAGS.sequence_length) print("finish loading data set...") print("fea_train " + str(fea_train.shape)) print("lbl_train " + str(lbl_train.shape)) print("fea_valid " + str(fea_valid.shape)) print("lbl_valid " + str(lbl_valid.shape)) print("fea_test " + str(fea_test.shape)) print("lbl_test " + str(lbl_test.shape)) with tf.Graph().as_default(): # set up session configuration session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf)
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") seed = 1 torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) epochs = 20 em_types = ['glove', 'word2vec', 'fasttext'] databases = ["MR", "SST-1", "SST-2", "SUBJ", "TREC", "ProcCons", "IMDB"] optimizers = ['adam', 'adagrad'] schedules = ['ReduceLROnPlateau', 'StepLR'] save_dir = './multi' if not os.path.exists(save_dir): os.makedirs(save_dir) # Train for em in em_types: print('EM {}'.format(em)) print('*' * 50) for d in databases: print(d) dir = save_dir + '/' + d if not os.path.exists(dir): os.makedirs(dir) # Load data train_loader, dev_loader, test_loader, num_class = load_dataset( d, 64, em) A, B, C, D = 64, 8, 16, 16 model = capsules(A=A, B=B, C=C, D=D, E=num_class, iters=2).to(device) for o in optimizers: for s in schedules: folder = dir + "/em=" + em + ",o=" + o + ",s=" + s if not os.path.exists(folder): os.makedirs(folder) criterion = SpreadLoss(num_class=num_class, m_min=0.2, m_max=0.9) if o == 'adam': optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0) elif o == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=0.01) if s == 'ReduceLROnPlateau': scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'max', patience=1) elif s == 'StepLR': scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) out_acc = open(folder + "/acc.csv", "w") out_loss = open(folder + "/loss.csv", "w") out_acc.write('epoch,phase,acc\n') out_loss.write('epoch,phase,loss\n') for epoch in range(1, epochs + 1): if s == 'StepLR': scheduler.step() torch.cuda.empty_cache() print('Epoch {}/{}'.format(epoch, epochs)) print('-' * 30) train_acc, train_loss = train(train_loader, model, criterion, optimizer, epoch, device) out_acc.write('{},{},{:.4f}\n'.format( epoch, 'train', train_acc)) out_loss.write('{},{},{:.4f}\n'.format( epoch, 'train', train_loss)) dev_acc, dev_loss = test(dev_loader, model, criterion, 'dev', device) out_acc.write('{},{},{:.4f}\n'.format( epoch, 'dev', dev_acc)) out_loss.write('{},{},{:.4f}\n'.format( epoch, 'dev', dev_loss)) if s == 'ReduceLROnPlateau': scheduler.step(train_acc) out_acc.close() out_loss.close() save_plot_to_file(folder + "/acc.csv", 'acc', folder + "/acc.png", epochs) save_plot_to_file(folder + "/loss.csv", 'loss', folder + "/loss.png", epochs) test_acc, test_loss = test(test_loader, model, criterion, 'TEST', device) out_test = open(folder + "/test.txt", "w") out_test.write('Accuracy: {:.6f}, Loss: {:.6f} \n'.format( test_acc, test_loss)) out_test.close() snapshot(model, folder, epochs)
resize_height = 256 resize_width = 256 cropped_height = 224 cropped_width = 224 no_classes = 200 INITIAL_LR = 1e-3 weight_decay_constant = 5e-4 train_count = 4794 val_count = 1199 DNN_BEST_MODEL = 'ft_all_layer.hdf5' init_weights_path = 'ft_last_layer.hdf5' EPOCHS_PATIENCE_BEFORE_STOPPING = 5 EPOCHS_PATIENCE_BEFORE_DECAY = 2 train_generator = load_dataset(train_file_paths, train_labels, batch_size, no_classes, resize_height, resize_width, cropped_height, cropped_width) val_generator = load_dataset(val_file_paths, val_labels, batch_size, no_classes, resize_height, resize_width, cropped_height, cropped_width) # Set batches of training and validation required train_batches = int(np.ceil(train_count / batch_size)) val_batches = int(np.ceil(val_count / batch_size)) cbcnn_model = vgg_16_cbcnn(input_shape=(cropped_height, cropped_width, 3), no_classes=no_classes, bilinear_output_dim=8192, sum_pool=True, weight_decay_constant=weight_decay_constant, multi_label=False,
"Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") word2id = data_helpers.load_vocab(FLAGS.vocab_file) print('vocabulary size: {}'.format(len(word2id))) response_data = data_helpers.load_responses(FLAGS.response_file, word2id, FLAGS.max_response_len) test_dataset = data_helpers.load_dataset(FLAGS.test_file, word2id, FLAGS.max_utter_len, FLAGS.max_utter_num, response_data) print('test_pairs: {}'.format(len(test_dataset))) target_loss_weight = [1.0, 1.0] print("\nEvaluating...\n") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) print(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf)
for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Load data print("Loading data...") vocab = data_helpers.load_vocab(FLAGS.vocab_file) print('vocabulary size: {}'.format(len(vocab))) charVocab = data_helpers.load_char_vocab(FLAGS.char_vocab_file) print('charVocab size: {}'.format(len(charVocab))) response_data = data_helpers.load_responses(FLAGS.response_file, vocab, FLAGS.max_response_len) print('response_data size: {}'.format(len(response_data))) train_dataset = data_helpers.load_dataset(FLAGS.train_file, vocab, FLAGS.max_utter_len, FLAGS.max_utter_num, response_data) print('train_pairs: {}'.format(len(train_dataset))) valid_dataset = data_helpers.load_dataset(FLAGS.valid_file, vocab, FLAGS.max_utter_len, FLAGS.max_utter_num, response_data) print('valid_pairs: {}'.format(len(valid_dataset))) target_loss_weight = [1.0, 1.0] with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): u2u_imn = U2U_IMN( max_utter_len=FLAGS.max_utter_len, max_utter_num=FLAGS.max_utter_num,
import numpy as np import data_helpers as dh import matplotlib.pyplot as plt import os import tensorflow as tf import datetime as dt img_w = 56 img_h = 32 digits=2 Xdata,Y,files = dh.load_dataset('shared/Digits_2',(img_w,img_h),digits) img_w = 104 img_h = 32 digits=4 Xdata,Y,files = dh.load_dataset('shared/Digits_4',(img_w,img_h),digits) img_w = 160 img_h = 32 digits=6 Xdata,Y,files = dh.load_dataset('shared/Digits_6f3',(img_w,img_h),digits) # invert and normalize to [0,1] # X = (255- Xdata)/255.0 # standarization #compute mean across the rows, sum elements from each column and divide
def main(): em_type = 'glove' database = 'IMDB' folder = database if not os.path.exists(database): os.makedirs(database) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") seed = 1 torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # Load data train_loader, dev_loader, test_loader, num_class = load_dataset(database, 64, em_type) A, B, C, D = 64, 8, 16, 16 #A, B, C, D = 32, 32, 32, 32 model = capsules(A=A, B=B, C=C, D=D, E=num_class, iters=2).to(device) # Save the model to the file model_file = open(folder + "/model.txt", "w") model_file.write('Model:\n{}\n'.format(model)) model_file.write('Total number of parameters:{}\n'.format(sum(p.numel() for p in model.parameters()))) model_file.write('Total number of trainable parameters:{}\n'.format(sum(p.numel() for p in model.parameters() if p.requires_grad))) model_file.close() criterion = SpreadLoss(num_class=num_class, m_min=0.2, m_max=0.9) optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=1) out_acc = open(folder + "/acc.csv", "w") out_loss = open(folder + "/loss.csv", "w") out_acc.write('epoch,phase,acc\n') out_loss.write('epoch,phase,loss\n') epochs = 2 for epoch in range(1, epochs + 1): torch.cuda.empty_cache() print('Epoch {}/{}'.format(epoch, epochs)) print('-' * 30) train_acc, train_loss = train(train_loader, model, criterion, optimizer, epoch, device) out_acc.write('{},{},{:.4f}\n'.format(epoch, 'train', train_acc)) out_loss.write('{},{},{:.4f}\n'.format(epoch, 'train', train_loss)) dev_acc, dev_loss = test(test_loader, model, criterion, 'dev', device) out_acc.write('{},{},{:.4f}\n'.format(epoch, 'dev', dev_acc)) out_loss.write('{},{},{:.4f}\n'.format(epoch, 'dev', dev_loss)) scheduler.step(train_acc) out_acc.close() out_loss.close() save_plot_to_file(folder + "/acc.csv", 'acc', folder + "/acc.png", epochs) save_plot_to_file(folder + "/loss.csv", 'loss', folder + "/loss.png", epochs) test_acc, test_loss = test(test_loader, model, criterion, 'TEST', device) out_test = open(folder + "/test.txt", "w") out_test.write('Accuracy: {:.6f}, Loss: {:.6f} \n'.format(test_acc, test_loss)) out_test.close() snapshot(model, database, epochs)
FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Load data print("Loading data...") word2id = data_helpers.load_vocab(FLAGS.vocab_file) print('vocabulary size: {}'.format(len(word2id))) response_data = data_helpers.load_responses(FLAGS.response_file, word2id, FLAGS.max_response_len) # 数据集中的一行 = context为多轮对话 + 标签(0:不相关,1:相关) + response_context train_dataset = data_helpers.load_dataset(FLAGS.train_file, word2id, FLAGS.max_utter_len, FLAGS.max_utter_num, response_data) print('train_pairs: {}'.format(len(train_dataset))) valid_dataset = data_helpers.load_dataset(FLAGS.valid_file, word2id, FLAGS.max_utter_len, FLAGS.max_utter_num, response_data) # *varied-length* print('valid_pairs: {}'.format(len(valid_dataset))) test_dataset = data_helpers.load_dataset(FLAGS.test_file, word2id, FLAGS.max_utter_len, FLAGS.max_utter_num, response_data) print('test_pairs: {}'.format(len(test_dataset))) target_loss_weight = [1.0, 1.0] with tf.Graph().as_default():
with h5py.File(model_file_path, mode='r') as f: topology.load_weights_from_hdf5_group(f['model_weights'], cbcnn_model.layers) # Get necessary data from raw csvs test_mat = pd.read_csv(test_file_name, sep=' ').as_matrix() test_filenames = test_mat[:, 0] test_labels = test_mat[:, 1] test_count = len(test_filenames) # Get test data set generator test_file_paths = [ os.path.join(base_dir, test_file) for test_file in test_filenames ] test_generator = load_dataset(test_file_paths, test_labels, batch_size, no_classes, resize_height, resize_width, cropped_height, cropped_width) # Get test predictions cnt = 0 test_batches = int(np.ceil(test_count / batch_size)) actual_labels = [] predicted_labels = [] for test_batch_tuple in test_generator: if cnt == test_batches: break test_data = test_batch_tuple[0] test_labels = test_batch_tuple[1]