Python prepare_dataset示例，prepare.prepare_dataset Python示例

示例#1

0

显示文件

    def detect_serialized_datasets(self):
        """
        Finding raw data pickles.
        If none found, proceed to creating pickles out of raw data.
        calls - 
        1. prepare.get_raw_input_data
        2. prepare.get_raw_target_data
        3. prepare.prepare_dataset
        """
        prepared_data_dir = str(utils.prepared_data_folder / self.dir_str / self.period)
        os.makedirs(prepared_data_dir, exist_ok=True)
        self.prepared_data_dir = prepared_data_dir
        print(f'Looking for pickles in {self.prepared_data_dir}')

        if len(utils.find('*serialized.pkl', self.prepared_data_dir)) == 2:
            print('This domain-period combination has been serialized before, loading objects...')
            for pkl in utils.find('*.pkl', self.prepared_data_dir):
                if "input_ds" in pkl: self.input_ds_serialized_path = pkl
                elif "rf_ds" in pkl: self.rf_ds_serialized_path = pkl
        else: 
            print('Proceeding to load & serialize raw data. ')
            self.raw_input_dir = prepare.get_raw_input_data(self)
            self.raw_rf_dir = prepare.get_raw_target_data(self)
            print(f'Raw input datasets taken from @: \n{self.raw_input_dir}')
            print(f'Raw rainfall datasets taken from @: \n{self.raw_rf_dir}')
            self.input_ds_serialized_path, self.rf_ds_serialized_path = prepare.prepare_dataset(self, self.prepared_data_dir)
        print(f'Serialized raw input datasets @: \n{self.input_ds_serialized_path}')
        print(f'Serialized raw RF datasets @: \n{self.rf_ds_serialized_path}')

示例#2

0

显示文件

文件： eval_tflite.py 项目： cwhuang888/tflite

def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')
    if not FLAGS.tflite_model:
        raise ValueError('You must supply the frozen pb with --tflite_model')
    if FLAGS.inference_type != 'float' and FLAGS.inference_type != 'uint8':
        raise ValueError('--inference_type must be one of float or uint8')

    tf.logging.set_verbosity(tf.logging.INFO)
    tfrecords = prepare_tfrecords(FLAGS.dataset_name, FLAGS.dataset_dir,
                                  FLAGS.dataset_split_name)

    if FLAGS.max_num_batches:
        num_batches = FLAGS.max_num_batches
    else:
        num_records = sum(
            [len(list(tf.python_io.tf_record_iterator(r))) for r in tfrecords])
        num_batches = int(math.ceil(num_records / float(FLAGS.batch_size)))

    filenames = tf.placeholder(tf.string, shape=[None])
    dataset = prepare_dataset(filenames,
                              FLAGS.dataset_name,
                              FLAGS.input_size,
                              batch_size=FLAGS.batch_size,
                              inference_type=FLAGS.inference_type)
    iterator = dataset.make_initializable_iterator()
    next_batch = iterator.get_next()

    tf.logging.info('Prepare run_tflite')
    eval_dir = os.path.dirname(FLAGS.tflite_model)
    eval_dir = os.path.join(eval_dir, 'eval_tflite')
    if not os.path.exists(eval_dir):
        os.makedirs(eval_dir)
    cmds = prepare_run_tflite_commands(eval_dir, FLAGS.tflite_model,
                                       FLAGS.inference_type)

    tf.logging.info('Prepare metrics')
    lbls, preds, accuracy, acc_update_op = prepare_metrics(
        FLAGS.dataset_name, inference_type=FLAGS.inference_type)

    # Initialize `iterator` with dataset.
    with tf.Session() as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(iterator.initializer, feed_dict={filenames: tfrecords})

        for step in range(num_batches):
            if (step % 1000) == 0:
                print('{}/{}'.format(step, num_batches))
                # print(' '.join(cmds))
                print('  Accuracy: [{:.4f}]'.format(sess.run(accuracy)))
            images, labels = sess.run(next_batch)

            np.save(os.path.join(eval_dir, 'batch_xs.npy'), images)
            subprocess.check_output(cmds)
            ys = np.load(os.path.join(eval_dir, 'output_ys.npy'))
            sess.run(acc_update_op, feed_dict={lbls: labels, preds: ys})

        print('Accuracy: [{:.4f}]'.format(sess.run(accuracy)))

示例#3

0

显示文件

import prepare as pp
import feature_construction as fc
import modeling as md

# 计时开始
from time import time

start = time()

pp.prepare_dataset()
fc.construct_feature()
md.one_hot()
md.tuning_hyper_parameters_sim()
md.predict_test_ol()

print('\nThe total time : {0:.0f} s'.format(time() - start))

示例#4

0

显示文件

文件： challenger.py 项目： souravbose1991/chinese_english

    "url": "https://www.dropbox.com/s/m38haw5rhz9wdm2/train_clean.tgz",
    "source": "train_clean.en",
    "target": "train_clean.zh",
    "data_source": "train.en",
    "data_target": "train.zh",
}

VALID = {
    "url": "https://www.dropbox.com/s/ft2evgnh8taeonf/valid_clean.tgz",
    "source": "valid_clean.en",
    "target": "valid_clean.zh",
    "data_source": "valid.en",
    "data_target": "valid.zh",
}

SAMPLE = {
    "url": "https://www.dropbox.com/s/11i3ccsizgq8lgt/sample_train.tgz",
    "source": "sample_train.en",
    "target": "sample_train.zh",
    "data_source": "train.en",
    "data_target": "train.zh",
}

DATA_DIR = "data/challenger_nmt/"
TMP_DIR = "tmp/challenger_nmt/"

if __name__ == '__main__':
    for ds in [SAMPLE, VALID]:
        # dataset is already tokenized
        prepare.prepare_dataset(DATA_DIR, TMP_DIR, ds, tokenize=False)

示例#5

0

显示文件

文件： eval_frozen.py 项目： cwhuang888/tflite

def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')
    if not FLAGS.frozen_pb:
        raise ValueError('You must supply the frozen pb with --frozen_pb')
    if not FLAGS.output_node_name:
        raise ValueError(
            'You must supply the output node name with --output_node_name')

    tf.logging.set_verbosity(tf.logging.INFO)
    tfrecords = prepare_tfrecords(FLAGS.dataset_name, FLAGS.dataset_dir,
                                  FLAGS.dataset_split_name)

    if FLAGS.max_num_batches:
        num_batches = FLAGS.max_num_batches
    else:
        num_records = sum(
            [len(list(tf.python_io.tf_record_iterator(r))) for r in tfrecords])
        num_batches = int(math.ceil(num_records / float(FLAGS.batch_size)))

    #  for example in tf.python_io.tf_record_iterator(tfrecords[0]):

#  result = tf.train.Example.FromString(example)
#  print(result)
#  break

    tf.logging.info('Prepare Dataset from tfrecord[0] '.format(tfrecords[0]))
    filenames = tf.placeholder(tf.string, shape=[None])
    dataset = prepare_dataset(filenames,
                              FLAGS.dataset_name,
                              FLAGS.input_size,
                              batch_size=FLAGS.batch_size)
    iterator = dataset.make_initializable_iterator()
    next_batch = iterator.get_next()

    tf.logging.info('Load GraphDef from frozen_pb {}'.format(FLAGS.frozen_pb))
    graph_def = load_graph_def(FLAGS.frozen_pb)

    tf.logging.info('Prepare metrics')
    lbls, preds, accuracy, acc_update_op = prepare_metrics(FLAGS.dataset_name)

    if FLAGS.summary_dir:
        tf.logging.info('Prepare summary writer')
        summary_writer = tf.summary.FileWriter(FLAGS.summary_dir)
        summaries = tf.summary.merge_all()

    # Initialize `iterator` with training data.
    with tf.Session() as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(iterator.initializer, feed_dict={filenames: tfrecords})

        tf.import_graph_def(graph_def, name='')
        graph = sess.graph

        # get x and y
        x = graph.get_tensor_by_name('{}:0'.format(FLAGS.input_node_name))
        y = graph.get_tensor_by_name('{}:0'.format(FLAGS.output_node_name))

        for step in range(num_batches):
            images, labels = sess.run(next_batch)
            ys = sess.run(y, feed_dict={x: images})
            sess.run(acc_update_op, feed_dict={lbls: labels, preds: ys})
            if FLAGS.summary_dir:
                summary = sess.run(summaries)
                summary_writer.add_summary(summary, step)

        print('Accuracy: [{:.4f}]'.format(sess.run(accuracy)))
        # import ipdb
        # ipdb.set_trace()
        if FLAGS.summary_dir:
            summary_writer.add_graph(sess.graph)

示例#6

0

显示文件

文件： pretrain.py 项目： aditya7874/ImageEnhancement

interval=args.interval
Ntrain=args.Ntrain
iterations=args.iterations

image_path="/coco/"
image_list=os.listdir(image_path)

outdir="./output_pretrain"
if not os.path.exists(outdir):
    os.mkdir(outdir)

test_box=[]
for i in range(testsize):
    rnd = np.random.randint(Ntrain + 1, Ntrain + 100)
    image_name = image_path + image_list[rnd]
    _, sr = prepare_dataset(image_name)
    test_box.append(sr)

x_test=chainer.as_variable(xp.array(test_box).astype(xp.float32))

generator=Generator()
generator.to_gpu()
gen_opt=set_optimizer(generator)

for epoch in range(epochs):
    sum_gen_loss=0
    sum_dis_loss=0
    for batch in range(0,iterations,batchsize):
        hr_box=[]
        sr_box=[]
        for index in range(batchsize):

示例#7

0

显示文件

文件： train.py 项目： realWang-Wei/ImageStyleTransfer

y_path="/twin/"
x_tag_path="/medium_mask/"
y_tag_path="/twin_mask/"
x_list=os.listdir(x_tag_path)
y_list=os.listdir(y_tag_path)
Nx = len(x_list) - 50
Ny = len(y_list)

test_box=[]
binary_box=[]
for _ in range(testsize):
    rnd=np.random.randint(Nx,Nx+50)
    filename=x_tag_path+x_list[rnd]
    binary=prepare_mask(filename,size,cluster)
    filename=x_path+x_list[rnd]
    image=prepare_dataset(filename,size,cluster)
    test_box.append(image)
    binary_box.append(binary)

test_img=chainer.as_variable(xp.array(test_box).astype(xp.float32))
test_mask=chainer.as_variable(xp.array(binary_box).astype(xp.float32))

outdir="./output"
if not os.path.exists(outdir):
    os.mkdir(outdir)

generator_xy = Generator()
generator_xy.to_gpu()
gen_xy_opt = set_optimizer(generator_xy)

generator_yx = Generator()

示例#8

0

显示文件

VALID_ENZH = {
    "url": "http://data.statmt.org/wmt18/translation-task/dev.tgz",
    "source": "dev/newsdev2017-zhen-ref.en.sgm",
    "target": "dev/newsdev2017-zhen-src.zh.sgm",
    "data_source": "valid.en",
    "data_target": "valid.zh",
}

TEST_ZHEN = {
    "url": "http://data.statmt.org/wmt18/translation-task/test.tgz",
    "source": "test/newstest2018-zhen-ref.en.sgm",
    "target": "test/newstest2018-zhen-src.zh.sgm",
    "data_source": "test.en",
    "data_target": "test.zh",
}

DATA_DIR = "data/wmt18_en_zh/"
TMP_DIR = "tmp/wmt18_en_zh/"

if __name__ == '__main__':
    sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
                                  encoding='utf-8',
                                  write_through=True,
                                  line_buffering=True)

    for ds in [TRAIN_ENZH, VALID_ENZH, TEST_ZHEN]:
        prepare.prepare_dataset(DATA_DIR, TMP_DIR, ds)

示例#9

0

显示文件

文件： quantor_frozen.py 项目： cwhuang888/tflite

def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')
    if not FLAGS.frozen_pb:
        raise ValueError('You must supply the frozen pb with --frozen_pb')
    if not FLAGS.output_node_name:
        raise ValueError(
            'You must supply the output node name with --output_node_name')
    if not FLAGS.output_dir:
        raise ValueError(
            'You must supply the output directory with --output_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    tfrecords = prepare_tfrecords(FLAGS.dataset_name, FLAGS.dataset_dir,
                                  FLAGS.dataset_split_name)

    if FLAGS.max_num_batches:
        num_batches = FLAGS.max_num_batches
    else:
        num_records = sum(
            [len(list(tf.python_io.tf_record_iterator(r))) for r in tfrecords])
        num_batches = int(math.ceil(num_records / float(FLAGS.batch_size)))

    tf.logging.info('Load GraphDef from frozen_pb {}'.format(FLAGS.frozen_pb))
    graph_def = load_graph_def(FLAGS.frozen_pb)

    tf.logging.info('Quantize Graph')
    with tf.Session() as sess:
        tf.import_graph_def(graph_def, name='')
        quantized_graph = qg.create_training_graph(sess.graph)
        quantized_inf_graph = qg.create_eval_graph(sess.graph)

    # Initialize `iterator` with training data.
    with tf.Session(graph=quantized_graph) as sess:
        tf.logging.info('Prepare dataset')
        with tf.name_scope("dataset"):
            filenames = tf.placeholder(tf.string, shape=[None])
            dataset = prepare_dataset(filenames,
                                      FLAGS.dataset_name,
                                      FLAGS.input_size,
                                      batch_size=FLAGS.batch_size)
            iterator = dataset.make_initializable_iterator()
            next_batch = iterator.get_next()

        tf.logging.info('Prepare metrics')
        lbls, preds, accuracy, acc_update_op = prepare_metrics(
            FLAGS.dataset_name)

        tf.logging.info('Prepare Saver')
        saver = tf.train.Saver()

        if FLAGS.summary_dir:
            tf.logging.info('Prepare summary writer')
            summary_writer = tf.summary.FileWriter(FLAGS.summary_dir)

        # initialize
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        sess.run(iterator.initializer, feed_dict={filenames: tfrecords})

        graph = sess.graph

        # get x and y
        x = graph.get_tensor_by_name('{}:0'.format(FLAGS.input_node_name))
        y = graph.get_tensor_by_name('{}:0'.format(FLAGS.output_node_name))

        # summary all min/max variables
        # print(graph.get_collection('variables')[3].eval())
        for var in graph.get_collection('variables'):
            tf.summary.scalar(var.name, var)
        summaries = tf.summary.merge_all()

        for step in range(num_batches):
            images, labels = sess.run(next_batch)
            ys = sess.run(y, feed_dict={x: images})
            sess.run(acc_update_op, feed_dict={lbls: labels, preds: ys})
            summary = sess.run(summaries)
            if FLAGS.summary_dir:
                summary_writer.add_summary(summary, step)

        print('Accuracy: [{:.4f}]'.format(sess.run(accuracy)))
        if FLAGS.summary_dir:
            summary_writer.add_graph(graph)

        # save graph and ckpts
        saver.save(sess, os.path.join(FLAGS.output_dir, "model.ckpt"))
        # tf.train.write_graph(graph, FLAGS.output_dir, 'quantor.pb', as_text=False)
        tf.train.write_graph(quantized_inf_graph,
                             FLAGS.output_dir,
                             'quantor.pb',
                             as_text=False)

示例#10

0

显示文件

predictor_y = UNet()
predictor_y.to_gpu()
pre_opt_y = set_optimizer(predictor_y)

for epoch in range(epochs):
    sum_dis_loss = 0
    sum_gen_loss = 0
    for batch in range(0, iterations, batchsize):
        x_box = []
        y_box = []
        rnd1 = np.random.randint(x_len - batchsize)
        rnd2 = np.random.randint(y_len - batchsize)
        for index in range(batchsize):
            image_name = x_path + str(rnd1 + index) + ".png"
            source = prepare_dataset(image_name)
            image_name = y_path + str(rnd2 + index) + ".png"
            target = prepare_dataset(image_name)
            x_box.append(source)
            y_box.append(target)

        x = chainer.as_variable(xp.array(x_box).astype(xp.float32))
        y = chainer.as_variable(xp.array(y_box).astype(xp.float32))

        for index in range(frames, batchsize):
            x_series = F.concat([
                x[index - 2].reshape(1, 3, size, size),
                x[index - 1].reshape(1, 3, size, size)
            ])
            x_serial = x[index - 2:index]
            y_series = F.concat([

示例#11

0

显示文件

discriminator.to_gpu()
dis_opt = set_optimizer(discriminator)

for epoch in range(epochs):
    sum_gen_loss = 0
    sum_dis_loss = 0
    for batch in range(0, iterations, framesize):
        input_box = []
        target_box = []
        opt_box = []
        rnd = np.random.randint(image_len)
        dir_path = image_path + image_list[rnd]
        ta = np.random.choice(["lefteye", "righteye"])
        for index in range(framesize):
            filename1 = dir_path + "/" + ta + "_" + str(0) + ".png"
            inp = prepare_dataset(filename1)
            input_box.append(inp)
            filename2 = dir_path + "/" + ta + "_" + str(index) + ".png"
            img = prepare_dataset(filename2)
            target_box.append(img)
            ref = optical_flow(filename1, filename2)
            opt_box.append(ref)

        x = chainer.as_variable(xp.array(input_box).astype(xp.float32))
        t = chainer.as_variable(xp.array(target_box).astype(xp.float32))
        opt = chainer.as_variable(xp.array(opt_box).astype(xp.float32))

        #y=encoder(F.concat([x,opt]))

        #_, channels, height, width=y.shape
        #y=y.reshape(1,framesize,channels,height,width).transpose(0,2,1,3,4)