def __obtain_tokens(self, filename): """ Opens the selected file and returns a list of tokens for it using the tokenize library. :param filename: path to file to be opened. :return: List of tokens (or empty list in case of Error) """ sample = [] try: current_file = open(os.path.join(self.root_dir, filename), 'r', encoding='utf-8') tokens = tokenize.generate_tokens(current_file.readline) # Dont process comments, newlines, block comments or empty tokens processed_tokens = [ du.preprocess(t_type, t_val) for t_type, t_val, _, _, _ in tokens if t_type != tokenize.COMMENT and not t_val.startswith("'''") and not t_val.startswith('"""') and ( t_type == tokenize.DEDENT or t_val != "") ] if processed_tokens: sample.append(processed_tokens) except OSError: pass return sample
def load_data(): mirage_csv_file = "C:/CS-GO-Grenade-Classification/project/data/train-grenades-de_mirage.csv" inferno_csv_file = "C:/CS-GO-Grenade-Classification/project/data/train-grenades-de_inferno.csv" inferno = pd.read_csv(mirage_csv_file, index_col = 0) mirage = pd.read_csv(inferno_csv_file, index_col = 0) raw_data = pd.concat([inferno, mirage]) X, y = preprocess(raw_data) return X, y
def build_gallery(sess, input_node, output_node, feature_code, image_paths, gallery_data_dir): """ 将gallery图片进行特征编码并保存相关数据 :param sess: a tf.Session() 用来启动模型 :param input_node: 模型的输入节点,placeholder,用来传入图片 :param output_node: 模型的输出节点,得到最终结果 :param feature_code: different code :param image_paths: list,所有图片路径 :param gallery_data_dir: gallery文件夹内的图片经模型提取的特征、图片路径以及图片路径字典都将保存在目录下 :return: """ print('Start building gallery...') assert os.path.isdir(gallery_data_dir), 'dir: {} cannot find'.format(gallery_data_dir) nums = len(image_paths) feature_list = [] for i, image_path in enumerate(image_paths): print('{}/{}'.format(i + 1, nums)) batch_image = data_utils.preprocess(image_path, feature_code) batch_embedding = sess.run(output_node, feed_dict={input_node: batch_image}) # scda encode if feature_code == 1: feature, _ = scda_utils.scda(batch_embedding) elif feature_code == 2: feature = scda_utils.scda_flip(batch_embedding) elif feature_code == 3: feature = scda_utils.scda_plus(batch_embedding) else: feature = scda_utils.scda_flip_plus(batch_embedding) # print(feature.shape) feature /= np.linalg.norm(feature, keepdims=True) feature_list.append(feature) # save feature feature_list = np.array(feature_list) np.save(os.path.join(gallery_data_dir, 'gallery_features.npy'), feature_list) print('Finish building gallery!') return feature_list
def test(test_dir, res_dir, model_h5): ## create dir for output test data if not exists(res_dir): os.makedirs(res_dir) test_paths = sorted(glob(join(test_dir, "*.*"))) print("{0} test images are loaded".format(len(test_paths))) ## load specific model assert os.path.exists(model_h5), "h5 model not found" model = SVAM_Net(res=im_shape) model.load_weights(model_h5) # testing loop times = [] s = time.time() for img_path in test_paths: # prepare data img_name = ntpath.basename(img_path).split('.')[0] inp_img = np.array(Image.open(img_path).resize(im_res)) im = np.expand_dims(preprocess(inp_img), axis=0) # generate saliency t0 = time.time() saml, sambu, samd, out = model.predict(im) times.append(time.time() - t0) _, out_bu, _, out_tdr = deprocess_gens(saml, sambu, samd, out, im_res) print("tested: {0}".format(img_path)) Image.fromarray(inp_img).save(join(res_dir, img_name + ".jpg")) Image.fromarray(out_bu).save(join(res_dir, img_name + "_bu.png")) Image.fromarray(out_tdr).save(join(res_dir, img_name + "_tdr.png")) # some statistics num_test = len(test_paths) if (num_test == 0): print("\nFound no images for test") else: print("\nTotal images: {0}".format(num_test)) # accumulate frame processing times (without bootstrap) Ttime, Mtime = np.sum(times[1:]), np.mean(times[1:]) print("Time taken: {0} sec at {1} fps".format(Ttime, 1. / Mtime)) print("\nSaved generated images in in {0}\n".format(res_dir))
def testing(nn_model, nb_photon, nb_epoch, lr, batch_size, GPU=False): """ :param nn_model: a PyTorch model :param nb_epoch :param lr :param GPU: a boolean flag that enables some cuda features from the PyTorch library :return: the performance of the model on a given dataset (nb_photon) """ best_precision = 0 optimizer = optim.Adam(nn_model.parameters(), lr=lr, weight_decay=0.05) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') training_losses = [] accuracies = [] validation_losses = [] train_loader, validation_loader, accuracy_loader = preprocess(nb_photon, batch_size) for epoch in tqdm(range(1, nb_epoch + 1)): # Train model nn_model = train(nn_model, train_loader, optimizer, GPU) # Accuracy on training set train_precision, loss_training = get_accuracy(nn_model, train_loader, GPU) training_losses.append(loss_training) # Accuracy and precision on validation set precision, loss_validation = get_accuracy(nn_model, validation_loader, GPU) validation_losses.append(loss_validation) accuracies.append(precision) if precision > best_precision: best_precision = precision # Scheduler scheduler.step(loss_validation) return best_precision, accuracy_loader, training_losses, validation_losses, accuracies
generator.load_weights(model_h5) print("\nLoaded data and model") ## create dir for output test data samples_dir = os.path.join("data/output/8x/", model_name) if not os.path.exists(samples_dir): os.makedirs(samples_dir) # testing loop times = [] s = time.time() for img_path in test_paths: # prepare data img_name = ntpath.basename(img_path).split('.')[0] img_lr = misc.imread(img_path, mode='RGB').astype(np.float) img_lr = misc.imresize(img_lr, (60, 80)) im = preprocess(img_lr) im = np.expand_dims(im, axis=0) # generate enhanced image s = time.time() gen = generator.predict(im) gen = deprocess(gen) # Rescale to 0-1 tot = time.time() - s times.append(tot) # save sample images misc.imsave(os.path.join(samples_dir, img_name + '_gen.png'), gen[0]) # some statistics num_test = len(test_paths) if (num_test == 0): print("\nFound no images for test") else:
while step < TOTAL_STEP: # pick random images every time for D for itr in range(n_critic): idx = np.random.choice(np.arange(num_train), BATCH_SIZE, replace=False) batchA_paths = trainA_paths[idx] batchB_paths = trainB_paths[idx] batchA_images = np.empty((BATCH_SIZE, 256, 256, 3), dtype=np.float32) batchB_images = np.empty((BATCH_SIZE, 256, 256, 3), dtype=np.float32) # enumerate batch and run graph for i, (a, b) in enumerate(zip(batchA_paths, batchB_paths)): a_img = misc.imread(a) b_img = misc.imread(b) # Data augmentation here - each has 50% chance if AUGMENT: a_img, b_img = augment(a_img, b_img) batchA_images[i, ...] = preprocess(a_img) batchB_images[i, ...] = preprocess(b_img) # train discriminator sess.run(D_train_op, feed_dict={ image_u: batchA_images, image_r: batchB_images }) # also get new batch for G idx = np.random.choice(np.arange(num_train), BATCH_SIZE, replace=False) batchA_paths = trainA_paths[idx] batchB_paths = trainB_paths[idx] batchA_images = np.empty((BATCH_SIZE, 256, 256, 3), dtype=np.float32) batchB_images = np.empty((BATCH_SIZE, 256, 256, 3), dtype=np.float32) for i, (a, b) in enumerate(zip(batchA_paths, batchB_paths)): a_img = misc.imread(a)
# w2v(log, 'uid', 'good_id', flag, 64) # w2v(log, 'uid', 'advertiser', flag, 64) # w2v(log, 'uid', 'aid', flag, 64) # # # Deepwalk # deepwalk(log, 'uid', 'aid', flag, 64) # deepwalk(log, 'uid', 'good_id', flag, 64) # # del train_df # del test_df # del log # gc.collect() # Word2vec print('preprocess train_log') train_log = preprocess(log_path='train_log_time_click_time_sequence.pkl') print('preprocess test_log') test_log = preprocess(is_train=False, log_path='test_log_time_click_time_sequence.pkl') log = pd.concat([train_log, test_log]) log.reset_index(drop=True, inplace=True) flag = 'test' # print('preprocess train_log') # train_log = preprocess(log_path='train_log.pkl') # log = train_log # flag = 'val' w2v(log, 'user_id', 'creative_id', flag, 128, window=10) w2v(log, 'user_id', 'ad_id', flag, 128, window=10) w2v(log, 'user_id', 'product_id', flag, 128, window=10)
loaded_model_json = json_file.read() times = [] s = time.time() cycle_gan_generator = model_from_json(loaded_model_json) # load weights cycle_gan_generator.load_weights(model_h5) tot = time.time() - s times.append(tot) print("\nLoaded data and model") # testing loop for img_path in test_paths: # prepare data img_name = ntpath.basename(img_path).split('.')[0] im = read_and_resize(img_path, (256, 256)) im = preprocess(im) im = np.expand_dims(im, axis=0) # (1,256,256,3) # generate enhanced image s = time.time() gen = cycle_gan_generator.predict(im) gen = deprocess(gen) # Rescale to 0-1 tot = time.time() - s times.append(tot) # save samples misc.imsave(samples_dir + img_name + '_real.png', im[0]) misc.imsave(samples_dir + img_name + '_gen.png', gen[0]) # some statistics num_test = len(test_paths) if (num_test == 0): print("\nFound no images for test")
epochs = 10 batch = 256 lr = 0.001 cuda = True model = "cnn" # 'cnn' or 'rnn' # Load vocabulary and make dictionary vocabs = load_vocab('data/imdb/imdb.vocab') w2i = {w: i for i, w in enumerate(vocabs)} i2w = {i: w for i, w in enumerate(vocabs)} vocab_size = len(vocabs) # Load Data train_x, train_y = load_data('data/', train=True) train_x, train_y = preprocess(train_x, train_y, w2i, maxlen) # Build Model & Loss & Optimizer model = RNN(embedding, rnn_hidden, num_layers, bi, output_dim, vocab_size) \ if model == 'rnn' else CNN(filters, num_filters, maxlen, vocab_size, embedding, output_dim) # Loss function & Optimizer criterion = nn.BCELoss() optim = torch.optim.Adam(model.parameters(), lr) if cuda: model.cuda() train_x = train_x.cuda() train_y = train_y.cuda() # Training procedure
# load model with open(model_json, "r") as json_file: loaded_model_json = json_file.read() funie_gan_generator = model_from_json(loaded_model_json) # load weights into new model funie_gan_generator.load_weights(model_h5) print("\nLoaded data and model") # testing loop times = [] s = time.time() for img_path in test_paths: # prepare data inp_img = read_and_resize(img_path, (256, 256)) im = preprocess(inp_img) im = np.expand_dims(im, axis=0) # (1,256,256,3) # generate enhanced image s = time.time() gen = funie_gan_generator.predict(im) gen_img = deprocess(gen)[0] tot = time.time() - s times.append(tot) # save output images img_name = ntpath.basename(img_path) out_img = np.hstack((inp_img, gen_img)).astype('uint8') Image.fromarray(out_img).save(join(samples_dir, img_name)) # some statistics num_test = len(test_paths) if (num_test == 0):
times = [] s = time.time() # keep the samples samples_dir = "../data/output/" if not os.path.exists(samples_dir): os.makedirs(samples_dir) # testing loop step = int(sess.run(global_step)) for img_path in tqdm(test_paths): # prepare data img_name = ntpath.basename(img_path) img_name = img_name.split('.')[0] batch_images = np.empty((1, 256, 256, 3), dtype=np.float32) a_img = read_and_resize(img_path, (256, 256)) a_img = preprocess(a_img) batch_images[0, ...] = a_img # generate enhanced image s = time.time() gen_images = sess.run(gen_image, feed_dict={image_u: batch_images}) tot = time.time() - s times.append(tot) # save sample images gen_images = np.asarray(gen_images) for gen, real in zip(gen_images, batch_images): misc.imsave(samples_dir + img_name + '_real.png', real) misc.imsave(samples_dir + img_name + '_gen.png', gen) # some statistics num_test = len(test_paths) if (num_test == 0):
def query(sess, input_node, output_node, feature_code, im_paths, gallery_features, query_labels, gallery_labels): """ :param sess: 管理模型的会话 :param input_node: 模型的输入节点 place holder :param output_node: 模型的输出节点 :param feature_code: 特征编码方式,同时对应模型不同的输入 输出 :param im_paths: im path list :param gallery_features: build_gallery return or load from files :param query_labels: label to check t or f :param gallery_labels: label to check t or f :return: None """ # 开始检索 query_num = len(im_paths) query_labels = np.array(query_labels) gallery_labels = np.array(gallery_labels) top_1 = 0.0 top_5 = 0.0 print("Start query images...") for i, query_im_path in enumerate(im_paths): print('---------') print('{}/{}'.format(i, query_num)) # get feature map batch_image = data_utils.preprocess(query_im_path, feature_code) batch_embedding = sess.run(output_node, feed_dict={input_node: batch_image}) # scda encode if feature_code == 1: query_feature, _ = scda_utils.scda(batch_embedding) elif feature_code == 2: query_feature = scda_utils.scda_flip(batch_embedding) elif feature_code == 3: query_feature = scda_utils.scda_plus(batch_embedding) else: query_feature = scda_utils.scda_flip_plus(batch_embedding) query_feature /= np.linalg.norm(query_feature, keepdims=True) # 计算相似度,并排序 cos_sim = np.dot(query_feature, gallery_features.T) cos_sim = 0.5 + 0.5 * cos_sim # 归一化, [-1, 1] --> [0, 1] sorted_indices = np.argsort(-cos_sim) # 值越大相似度越大,因此添加‘-’升序排序 # 统计检索结果AP top1 top5 query_label = query_labels[i] k_gallery_label = gallery_labels[sorted_indices[:5]] # 计算top1的AP if query_label == k_gallery_label[0]: top_1 += 1 # 计算top5的AP correct = 0 ap = 0 for j in range(5): if query_label == k_gallery_label[j]: correct += 1 ap += (correct / (j + 1)) ap = (ap / correct) if correct is not 0 else 0 top_5 = top_5 + ap print("top1-AP:%f | top5-AP: %f" % (top_1, ap)) # 统计mAP print('top1-mAP:', round(top_1 / query_num, 5)) print('top5-mAP:', round(top_5 / query_num, 5))
batch = 128 lr = 0.001 cuda = torch.cuda.is_available() # - Attention visualization show_attn = False show_ex_num = 123 # Load Data and Build dictionaries src_train_sent, tar_train_sent = load_data('data/', train=True, small=True) src_dict, src_cand = load_vocab(src_train_sent) tar_dict, tar_cand = load_vocab(tar_train_sent) src_vocab_size = len(src_dict) tar_vocab_size = len(tar_dict) src_train, tar_train = preprocess(src_train_sent, tar_train_sent, src_dict, tar_dict, maxlen) # Build Seq2Seq Model & Loss & Optimizer model = Seq2seq(embedding_dim, rnn_hidden, num_layers, src_vocab_size, tar_vocab_size, bi, attention, attn_type, attn_dim) criterion = nn.NLLLoss(ignore_index=3) optim = torch.optim.Adam(model.parameters(), lr) if cuda: model.cuda() src_train = src_train.cuda() tar_train = tar_train.cuda() # Training total_batch = np.ceil(len(src_train) / batch)
def create_transform(self): if self._train_mode == 'Train': return aug_preprocess(resize=self._opt.image_size) else: return preprocess(resize=self._opt.image_size)
loaded_model_json = json_file.read() generator = model_from_json(loaded_model_json) generator.load_weights(model_h5) print("\nLoaded data and model") ## create dir for output test data samples_dir = join("data/output/", "keras_out") if not exists(samples_dir): os.makedirs(samples_dir) # testing loop times = [] for img_path in test_paths: # prepare data img_name = basename(img_path).split('.')[0] img_lrd = np.array(Image.open(img_path).resize(lr_res)) im = np.expand_dims(preprocess(img_lrd), axis=0) # get output s = time.time() gen_op = generator.predict(im) gen_lr, gen_hr, gen_mask = gen_op[0], gen_op[1], gen_op[2] tot = time.time() - s times.append(tot) # process raw outputs gen_lr = deprocess_uint8(gen_lr).reshape(lr_shape) gen_hr = deprocess_uint8(gen_hr).reshape(hr_shape) gen_mask = deprocess_mask(gen_mask).reshape(lr_h, lr_w) # save generated images Image.fromarray(img_lrd).save(join(samples_dir, img_name + '.png')) Image.fromarray(gen_lr).save(join(samples_dir, img_name + '_En.png')) Image.fromarray(gen_mask).save(join(samples_dir, img_name + '_Sal.png')) Image.fromarray(gen_hr).save(join(samples_dir, img_name + '_SESR.png'))