Python preprocess示例，utils.data_utils.preprocess Python示例

示例#1

0

显示文件

    def __obtain_tokens(self, filename):
        """
        Opens the selected file and returns a list of tokens for it using the tokenize library.
        :param filename: path to file to be opened.
        :return: List of tokens (or empty list in case of Error)
        """
        sample = []
        try:
            current_file = open(os.path.join(self.root_dir, filename),
                                'r',
                                encoding='utf-8')
            tokens = tokenize.generate_tokens(current_file.readline)

            # Dont process comments, newlines, block comments or empty tokens
            processed_tokens = [
                du.preprocess(t_type, t_val)
                for t_type, t_val, _, _, _ in tokens
                if t_type != tokenize.COMMENT and not t_val.startswith("'''")
                and not t_val.startswith('"""') and (
                    t_type == tokenize.DEDENT or t_val != "")
            ]
            if processed_tokens:
                sample.append(processed_tokens)
        except OSError:
            pass
        return sample

示例#2

0

显示文件

文件： sklearn_optimize.py 项目： kwasniakk/CS-GO-Grenade-Classification

def load_data():
    mirage_csv_file = "C:/CS-GO-Grenade-Classification/project/data/train-grenades-de_mirage.csv"
    inferno_csv_file = "C:/CS-GO-Grenade-Classification/project/data/train-grenades-de_inferno.csv"
    inferno = pd.read_csv(mirage_csv_file, index_col = 0)
    mirage = pd.read_csv(inferno_csv_file, index_col = 0)

    raw_data = pd.concat([inferno, mirage])
    X, y = preprocess(raw_data)
    return X, y

示例#3

0

显示文件

文件： retrieve_utils.py 项目： yongqis/image_retireval

def build_gallery(sess, input_node, output_node, feature_code, image_paths, gallery_data_dir):
    """
    将gallery图片进行特征编码并保存相关数据
    :param sess: a tf.Session() 用来启动模型
    :param input_node: 模型的输入节点，placeholder，用来传入图片
    :param output_node: 模型的输出节点，得到最终结果
    :param feature_code: different code
    :param image_paths: list,所有图片路径
    :param gallery_data_dir: gallery文件夹内的图片经模型提取的特征、图片路径以及图片路径字典都将保存在目录下
    :return:
    """
    print('Start building gallery...')

    assert os.path.isdir(gallery_data_dir), 'dir: {} cannot find'.format(gallery_data_dir)

    nums = len(image_paths)
    feature_list = []
    for i, image_path in enumerate(image_paths):
        print('{}/{}'.format(i + 1, nums))
        batch_image = data_utils.preprocess(image_path, feature_code)
        batch_embedding = sess.run(output_node, feed_dict={input_node: batch_image})
        # scda encode
        if feature_code == 1:
            feature, _ = scda_utils.scda(batch_embedding)
        elif feature_code == 2:
            feature = scda_utils.scda_flip(batch_embedding)
        elif feature_code == 3:
            feature = scda_utils.scda_plus(batch_embedding)
        else:
            feature = scda_utils.scda_flip_plus(batch_embedding)
        # print(feature.shape)
        feature /= np.linalg.norm(feature, keepdims=True)
        feature_list.append(feature)

    # save feature
    feature_list = np.array(feature_list)
    np.save(os.path.join(gallery_data_dir, 'gallery_features.npy'), feature_list)

    print('Finish building gallery!')
    return feature_list

示例#4

0

显示文件

文件： test_svam.py 项目： wang8063/SVAM-Net

def test(test_dir, res_dir, model_h5):
    ## create dir for output test data
    if not exists(res_dir): os.makedirs(res_dir)
    test_paths = sorted(glob(join(test_dir, "*.*")))
    print("{0} test images are loaded".format(len(test_paths)))

    ## load specific model
    assert os.path.exists(model_h5), "h5 model not found"
    model = SVAM_Net(res=im_shape)
    model.load_weights(model_h5)

    # testing loop
    times = []
    s = time.time()
    for img_path in test_paths:
        # prepare data
        img_name = ntpath.basename(img_path).split('.')[0]
        inp_img = np.array(Image.open(img_path).resize(im_res))
        im = np.expand_dims(preprocess(inp_img), axis=0)
        # generate saliency
        t0 = time.time()
        saml, sambu, samd, out = model.predict(im)
        times.append(time.time() - t0)
        _, out_bu, _, out_tdr = deprocess_gens(saml, sambu, samd, out, im_res)
        print("tested: {0}".format(img_path))
        Image.fromarray(inp_img).save(join(res_dir, img_name + ".jpg"))
        Image.fromarray(out_bu).save(join(res_dir, img_name + "_bu.png"))
        Image.fromarray(out_tdr).save(join(res_dir, img_name + "_tdr.png"))

    # some statistics
    num_test = len(test_paths)
    if (num_test == 0): print("\nFound no images for test")
    else:
        print("\nTotal images: {0}".format(num_test))
        # accumulate frame processing times (without bootstrap)
        Ttime, Mtime = np.sum(times[1:]), np.mean(times[1:])
        print("Time taken: {0} sec at {1} fps".format(Ttime, 1. / Mtime))
        print("\nSaved generated images in in {0}\n".format(res_dir))

示例#5

0

显示文件

文件： main.py 项目： edhhan/backscattered-light-cnn

def testing(nn_model, nb_photon, nb_epoch, lr, batch_size, GPU=False):
    """
    :param nn_model: a PyTorch model
    :param nb_epoch
    :param lr
    :param GPU: a boolean flag that enables some cuda features from the PyTorch library
    :return: the performance of the model on a given dataset (nb_photon)
    """
    best_precision = 0
    optimizer = optim.Adam(nn_model.parameters(), lr=lr, weight_decay=0.05)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

    training_losses = []
    accuracies = []
    validation_losses = []

    train_loader, validation_loader, accuracy_loader = preprocess(nb_photon, batch_size)

    for epoch in tqdm(range(1, nb_epoch + 1)):

        # Train model
        nn_model = train(nn_model, train_loader, optimizer, GPU)

        # Accuracy on training set
        train_precision, loss_training = get_accuracy(nn_model, train_loader, GPU)
        training_losses.append(loss_training)

        # Accuracy and precision on validation set
        precision, loss_validation = get_accuracy(nn_model, validation_loader, GPU)
        validation_losses.append(loss_validation)
        accuracies.append(precision)
        if precision > best_precision:
            best_precision = precision

        # Scheduler
        scheduler.step(loss_validation)

    return best_precision, accuracy_loader, training_losses, validation_losses, accuracies

示例#6

0

显示文件

文件： test_SR_8x.py 项目： enansakib/srdrm

generator.load_weights(model_h5)
print("\nLoaded data and model")

## create dir for output test data
samples_dir = os.path.join("data/output/8x/", model_name)
if not os.path.exists(samples_dir): os.makedirs(samples_dir)

# testing loop
times = []
s = time.time()
for img_path in test_paths:
    # prepare data
    img_name = ntpath.basename(img_path).split('.')[0]
    img_lr = misc.imread(img_path, mode='RGB').astype(np.float)
    img_lr = misc.imresize(img_lr, (60, 80))
    im = preprocess(img_lr)
    im = np.expand_dims(im, axis=0)
    # generate enhanced image
    s = time.time()
    gen = generator.predict(im)
    gen = deprocess(gen)  # Rescale to 0-1
    tot = time.time() - s
    times.append(tot)
    # save sample images
    misc.imsave(os.path.join(samples_dir, img_name + '_gen.png'), gen[0])

# some statistics
num_test = len(test_paths)
if (num_test == 0):
    print("\nFound no images for test")
else:

示例#7

0

显示文件

while step < TOTAL_STEP:
    # pick random images every time for D
    for itr in range(n_critic):
        idx = np.random.choice(np.arange(num_train), BATCH_SIZE, replace=False)
        batchA_paths = trainA_paths[idx]
        batchB_paths = trainB_paths[idx]
        batchA_images = np.empty((BATCH_SIZE, 256, 256, 3), dtype=np.float32)
        batchB_images = np.empty((BATCH_SIZE, 256, 256, 3), dtype=np.float32)
        # enumerate batch and run graph
        for i, (a, b) in enumerate(zip(batchA_paths, batchB_paths)):
            a_img = misc.imread(a)
            b_img = misc.imread(b)
            # Data augmentation here - each has 50% chance
            if AUGMENT:
                a_img, b_img = augment(a_img, b_img)
            batchA_images[i, ...] = preprocess(a_img)
            batchB_images[i, ...] = preprocess(b_img)
        # train discriminator
        sess.run(D_train_op,
                 feed_dict={
                     image_u: batchA_images,
                     image_r: batchB_images
                 })
    # also get new batch for G
    idx = np.random.choice(np.arange(num_train), BATCH_SIZE, replace=False)
    batchA_paths = trainA_paths[idx]
    batchB_paths = trainB_paths[idx]
    batchA_images = np.empty((BATCH_SIZE, 256, 256, 3), dtype=np.float32)
    batchB_images = np.empty((BATCH_SIZE, 256, 256, 3), dtype=np.float32)
    for i, (a, b) in enumerate(zip(batchA_paths, batchB_paths)):
        a_img = misc.imread(a)

示例#8

0

显示文件

    #     w2v(log, 'uid', 'good_id', flag, 64)
    #     w2v(log, 'uid', 'advertiser', flag, 64)
    #     w2v(log, 'uid', 'aid', flag, 64)
    #
    #     # Deepwalk
    #     deepwalk(log, 'uid', 'aid', flag, 64)
    #     deepwalk(log, 'uid', 'good_id', flag, 64)
    #
    #     del train_df
    #     del test_df
    #     del log
    #     gc.collect()

    # Word2vec
    print('preprocess train_log')
    train_log = preprocess(log_path='train_log_time_click_time_sequence.pkl')
    print('preprocess test_log')
    test_log = preprocess(is_train=False,
                          log_path='test_log_time_click_time_sequence.pkl')
    log = pd.concat([train_log, test_log])
    log.reset_index(drop=True, inplace=True)
    flag = 'test'

    # print('preprocess train_log')
    # train_log = preprocess(log_path='train_log.pkl')
    # log = train_log
    # flag = 'val'

    w2v(log, 'user_id', 'creative_id', flag, 128, window=10)
    w2v(log, 'user_id', 'ad_id', flag, 128, window=10)
    w2v(log, 'user_id', 'product_id', flag, 128, window=10)

示例#9

0

显示文件

    loaded_model_json = json_file.read()
times = []
s = time.time()
cycle_gan_generator = model_from_json(loaded_model_json)
# load weights
cycle_gan_generator.load_weights(model_h5)
tot = time.time() - s
times.append(tot)
print("\nLoaded data and model")

# testing loop
for img_path in test_paths:
    # prepare data
    img_name = ntpath.basename(img_path).split('.')[0]
    im = read_and_resize(img_path, (256, 256))
    im = preprocess(im)
    im = np.expand_dims(im, axis=0)  # (1,256,256,3)
    # generate enhanced image
    s = time.time()
    gen = cycle_gan_generator.predict(im)
    gen = deprocess(gen)  # Rescale to 0-1
    tot = time.time() - s
    times.append(tot)
    # save samples
    misc.imsave(samples_dir + img_name + '_real.png', im[0])
    misc.imsave(samples_dir + img_name + '_gen.png', gen[0])

# some statistics
num_test = len(test_paths)
if (num_test == 0):
    print("\nFound no images for test")

示例#10

0

显示文件

文件： main.py 项目： yoongi0428/dmlab_torch_tutorial

epochs = 10
batch = 256
lr = 0.001
cuda = True

model = "cnn"   # 'cnn' or 'rnn'

# Load vocabulary and make dictionary
vocabs = load_vocab('data/imdb/imdb.vocab')
w2i = {w: i for i, w in enumerate(vocabs)}
i2w = {i: w for i, w in enumerate(vocabs)}
vocab_size = len(vocabs)

# Load Data
train_x, train_y = load_data('data/', train=True)
train_x, train_y = preprocess(train_x, train_y, w2i, maxlen)

# Build Model & Loss & Optimizer
model = RNN(embedding, rnn_hidden, num_layers, bi, output_dim, vocab_size) \
    if model == 'rnn' else CNN(filters, num_filters, maxlen, vocab_size, embedding, output_dim)

# Loss function & Optimizer
criterion = nn.BCELoss()
optim = torch.optim.Adam(model.parameters(), lr)

if cuda:
    model.cuda()
    train_x = train_x.cuda()
    train_y = train_y.cuda()

# Training procedure

示例#11

0

显示文件

文件： test_funieGAN.py 项目： wehak/FUnIE-GAN

# load model
with open(model_json, "r") as json_file:
    loaded_model_json = json_file.read()
funie_gan_generator = model_from_json(loaded_model_json)
# load weights into new model
funie_gan_generator.load_weights(model_h5)
print("\nLoaded data and model")

# testing loop
times = []
s = time.time()
for img_path in test_paths:
    # prepare data
    inp_img = read_and_resize(img_path, (256, 256))
    im = preprocess(inp_img)
    im = np.expand_dims(im, axis=0)  # (1,256,256,3)
    # generate enhanced image
    s = time.time()
    gen = funie_gan_generator.predict(im)
    gen_img = deprocess(gen)[0]
    tot = time.time() - s
    times.append(tot)
    # save output images
    img_name = ntpath.basename(img_path)
    out_img = np.hstack((inp_img, gen_img)).astype('uint8')
    Image.fromarray(out_img).save(join(samples_dir, img_name))

# some statistics
num_test = len(test_paths)
if (num_test == 0):

示例#12

0

显示文件

times = []
s = time.time()

# keep the samples
samples_dir = "../data/output/"
if not os.path.exists(samples_dir): os.makedirs(samples_dir)

# testing loop
step = int(sess.run(global_step))
for img_path in tqdm(test_paths):
    # prepare data
    img_name = ntpath.basename(img_path)
    img_name = img_name.split('.')[0]
    batch_images = np.empty((1, 256, 256, 3), dtype=np.float32)
    a_img = read_and_resize(img_path, (256, 256))
    a_img = preprocess(a_img)
    batch_images[0, ...] = a_img
    # generate enhanced image
    s = time.time()
    gen_images = sess.run(gen_image, feed_dict={image_u: batch_images})
    tot = time.time() - s
    times.append(tot)
    # save sample images
    gen_images = np.asarray(gen_images)
    for gen, real in zip(gen_images, batch_images):
        misc.imsave(samples_dir + img_name + '_real.png', real)
        misc.imsave(samples_dir + img_name + '_gen.png', gen)

# some statistics
num_test = len(test_paths)
if (num_test == 0):

示例#13

0

显示文件

文件： retrieve_utils.py 项目： yongqis/image_retireval

def query(sess, input_node, output_node, feature_code, im_paths, gallery_features, query_labels, gallery_labels):
    """

    :param sess: 管理模型的会话
    :param input_node: 模型的输入节点 place holder
    :param output_node: 模型的输出节点
    :param feature_code: 特征编码方式，同时对应模型不同的输入 输出
    :param im_paths: im path list
    :param gallery_features: build_gallery return or load from files
    :param query_labels: label to check t or f
    :param gallery_labels: label to check t or f
    :return: None
    """
    # 开始检索
    query_num = len(im_paths)
    query_labels = np.array(query_labels)
    gallery_labels = np.array(gallery_labels)

    top_1 = 0.0
    top_5 = 0.0
    print("Start query images...")
    for i, query_im_path in enumerate(im_paths):
        print('---------')
        print('{}/{}'.format(i, query_num))
        # get feature map
        batch_image = data_utils.preprocess(query_im_path, feature_code)
        batch_embedding = sess.run(output_node, feed_dict={input_node: batch_image})
        # scda encode
        if feature_code == 1:
            query_feature, _ = scda_utils.scda(batch_embedding)
        elif feature_code == 2:
            query_feature = scda_utils.scda_flip(batch_embedding)
        elif feature_code == 3:
            query_feature = scda_utils.scda_plus(batch_embedding)
        else:
            query_feature = scda_utils.scda_flip_plus(batch_embedding)
        query_feature /= np.linalg.norm(query_feature, keepdims=True)
        # 计算相似度，并排序
        cos_sim = np.dot(query_feature, gallery_features.T)
        cos_sim = 0.5 + 0.5 * cos_sim  # 归一化， [-1, 1] --> [0, 1]
        sorted_indices = np.argsort(-cos_sim)  # 值越大相似度越大，因此添加‘-’升序排序
        # 统计检索结果AP top1 top5
        query_label = query_labels[i]
        k_gallery_label = gallery_labels[sorted_indices[:5]]
        # 计算top1的AP
        if query_label == k_gallery_label[0]:
            top_1 += 1
        # 计算top5的AP
        correct = 0
        ap = 0
        for j in range(5):
            if query_label == k_gallery_label[j]:
                correct += 1
                ap += (correct / (j + 1))
        ap = (ap / correct) if correct is not 0 else 0
        top_5 = top_5 + ap
        print("top1-AP:%f | top5-AP: %f" % (top_1, ap))

    # 统计mAP
    print('top1-mAP:', round(top_1 / query_num, 5))
    print('top5-mAP:', round(top_5 / query_num, 5))

示例#14

0

显示文件

batch = 128
lr = 0.001
cuda = torch.cuda.is_available()

# - Attention visualization
show_attn = False
show_ex_num = 123

# Load Data and Build dictionaries
src_train_sent, tar_train_sent = load_data('data/', train=True, small=True)
src_dict, src_cand = load_vocab(src_train_sent)
tar_dict, tar_cand = load_vocab(tar_train_sent)
src_vocab_size = len(src_dict)
tar_vocab_size = len(tar_dict)

src_train, tar_train = preprocess(src_train_sent, tar_train_sent, src_dict,
                                  tar_dict, maxlen)

# Build Seq2Seq Model & Loss & Optimizer
model = Seq2seq(embedding_dim, rnn_hidden, num_layers, src_vocab_size,
                tar_vocab_size, bi, attention, attn_type, attn_dim)

criterion = nn.NLLLoss(ignore_index=3)
optim = torch.optim.Adam(model.parameters(), lr)

if cuda:
    model.cuda()
    src_train = src_train.cuda()
    tar_train = tar_train.cuda()

# Training
total_batch = np.ceil(len(src_train) / batch)

示例#15

0

显示文件

 def create_transform(self):
     if self._train_mode == 'Train':
         return aug_preprocess(resize=self._opt.image_size)
     else:
         return preprocess(resize=self._opt.image_size)

示例#16

0

显示文件

    loaded_model_json = json_file.read()
generator = model_from_json(loaded_model_json)
generator.load_weights(model_h5)
print("\nLoaded data and model")

## create dir for output test data
samples_dir = join("data/output/", "keras_out")
if not exists(samples_dir): os.makedirs(samples_dir)

# testing loop
times = []
for img_path in test_paths:
    # prepare data
    img_name = basename(img_path).split('.')[0]
    img_lrd = np.array(Image.open(img_path).resize(lr_res))
    im = np.expand_dims(preprocess(img_lrd), axis=0)
    # get output
    s = time.time()
    gen_op = generator.predict(im)
    gen_lr, gen_hr, gen_mask = gen_op[0], gen_op[1], gen_op[2]
    tot = time.time() - s
    times.append(tot)
    # process raw outputs
    gen_lr = deprocess_uint8(gen_lr).reshape(lr_shape)
    gen_hr = deprocess_uint8(gen_hr).reshape(hr_shape)
    gen_mask = deprocess_mask(gen_mask).reshape(lr_h, lr_w)
    # save generated images
    Image.fromarray(img_lrd).save(join(samples_dir, img_name + '.png'))
    Image.fromarray(gen_lr).save(join(samples_dir, img_name + '_En.png'))
    Image.fromarray(gen_mask).save(join(samples_dir, img_name + '_Sal.png'))
    Image.fromarray(gen_hr).save(join(samples_dir, img_name + '_SESR.png'))