Exemplo n.º 1
0
    def _pipeline(self, config, inputs):
        if config.model == 'cbow':
            model_func = md.cbow_forward

        elif config.model == 'rnn':
            model_func = md.rnn_forward
        elif config.model == 'lstm':
            model_func = md.lstm_forward
        elif config.model == 'lstm_gru':
            model_func = md.lstm_fw_gru_bw
        elif config.model == 'gru_lstm':
            model_func = md.gru_fw_lstm_bw

        elif config.model == 'att':
            model_func = md.attention_forward
        elif config.model == 'lstm_att':
            model_func = md.lstm_attention_forward

        elif config.model == 'att2rnn':
            model_func = md.attention_to_rnn_forward

        else:
            raise NotImplementedError()
        self.variables, outputs = model_func(config, inputs)
        loss, grads = None, None
        if config.supervised:
            loss = md.get_loss(config, inputs, outputs)
            if config.is_train:
                grads = self.opt.compute_gradients(loss)
        return outputs, loss, grads
Exemplo n.º 2
0
def train():
    img = tf.placeholder(shape=[config.batch_size, config.Config['min_dim'], config.Config['min_dim'], 3], dtype=tf.float32)
    #ig = AddCoords(x_dim=512,y_dim=512)(img)
    anchors_num = sum(
        [config.Config['feature_maps'][s] ** 2 * config.Config['aspect_num'][s] for s in range(5)])
    loc = tf.placeholder(shape=[config.batch_size, anchors_num, 4], dtype=tf.float32)
    conf = tf.placeholder(shape=[config.batch_size, anchors_num], dtype=tf.float32)
    pred_loc, pred_confs, vbs = retinanet.model(img,config)
    train_tensors = get_loss(conf, loc, pred_loc, pred_confs,config)
    gen = data_gen.get_batch_inception(batch_size=config.batch_size,image_size=config.Config['min_dim'],max_detect=50)

    global_step = slim.get_or_create_global_step()
    lr = tf.train.exponential_decay(
        learning_rate=0.001,
        global_step=global_step,
        decay_steps=40000,
        decay_rate=0.7,
        staircase=True)

    tf.summary.scalar('lr', lr)
    sum_op = tf.summary.merge_all()

    optimizer = tf.train.MomentumOptimizer(learning_rate=lr,momentum=0.9)
    train_op = slim.learning.create_train_op(train_tensors, optimizer)
    vbs = []
    for s in slim.get_variables():
        print(s.name)
        if 'resnet_v2_50' in s.name and 'Momentum' not in s.name:
            print(s.name)
            vbs.append(s)

    saver = tf.train.Saver(vbs)

    def restore(sess):
        saver.restore(sess, config.check_dir)


    sv = tf.train.Supervisor(logdir=config.save_dir, summary_op=None, init_fn=restore)

    with sv.managed_session() as sess:
        for step in range(200000):
            print('       '+' '.join(['*']*(step%10)))
            images, true_box, true_label = q.get()

            loct, conft = np_utils.get_loc_conf(true_box, true_label, batch_size=config.batch_size,cfg=config.Config)
            feed_dict = {img: images, loc: loct,
                         conf: conft}

            ls, step = sess.run([train_op, global_step], feed_dict=feed_dict)

            if step % 10 == 0:
                print('step:' + str(step) +
                      ' ' + 'class_loss:' + str(ls[0]) +
                      ' ' + 'loc_loss:' + str(ls[1])
                      )
                summaries = sess.run(sum_op, feed_dict=feed_dict)
                sv.summary_computed(sess, summaries)
Exemplo n.º 3
0
def train():
    global_step = tf.Variable(0, trainable=False)
    dataset = coco_input.get_dataset()
    labels, images = dataset.train_input()

    network = model.Network(is_train=True)
    logits = network.inference(images)

    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    entropy, loss = model.get_loss(labels, logits)

    lr, opt = get_opt(loss, global_step)
    summary_op = tf.merge_all_summaries()

    #gpu_options = tf.GPUOptions(allow_growth=True)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        init = tf.initialize_all_variables()
        sess.run(init)
        if FLAGS.dir_pretrain is not None:
            saver = tf.train.Saver(model.get_pretrain_variables())
            restore_model(saver, sess)

        summary_writer = tf.train.SummaryWriter("log", sess.graph)

        tf.train.start_queue_runners(sess=sess)
        saver = tf.train.Saver(model.get_restore_variables())

        for num_iter in range(1, FLAGS.max_steps + 1):
            start_time = time.time()
            value_entropy, value_loss, value_lr, _ = sess.run(
                [entropy, loss, lr, opt])
            duration = time.time() - start_time
            assert not np.isnan(value_loss), 'Model diverged with loss = NaN'

            if num_iter % 10 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                print(
                    "step = {} entropy = {:.2f} loss = {:.2f} ({:.1f} examples/sec; {:.1f} sec/batch)"
                    .format(num_iter, value_entropy, value_loss,
                            examples_per_sec, sec_per_batch))

            if num_iter % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, num_iter)

            if num_iter % 1000 == 0:
                print "lr = {:.2f}".format(value_lr)
                checkpoint_path = os.path.join(FLAGS.dir_parameter,
                                               'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=num_iter)
Exemplo n.º 4
0
def evaluate():
    is_training = False

    with tf.device('/gpu:' + str(GPU_INDEX)):
        pointclouds_pl, labels_pl = placeholder_inputs(BATCH_SIZE, NUM_POINT)
        is_training_pl = tf.placeholder(tf.bool, shape=())

        # simple model
        pred = get_model(pointclouds_pl, is_training_pl)
        loss = get_loss(pred, labels_pl)
        pred_softmax = tf.nn.softmax(pred)

        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()

    # Create a session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    config.log_device_placement = True
    sess = tf.Session(config=config)

    # Restore variables from disk.
    saver.restore(sess, MODEL_PATH)
    log_string("Model restored.")

    ops = {
        'pointclouds_pl': pointclouds_pl,
        'labels_pl': labels_pl,
        'is_training_pl': is_training_pl,
        'pred': pred,
        'pred_softmax': pred_softmax,
        'loss': loss
    }

    total_correct = 0
    total_seen = 0
    fout_out_filelist = open(FLAGS.output_filelist, 'w')
    for room_path in ROOM_PATH_LIST:
        out_data_label_filename = os.path.basename(
            room_path)[:-4] + '_pred.txt'
        out_data_label_filename = os.path.join(DUMP_DIR,
                                               out_data_label_filename)
        out_gt_label_filename = os.path.basename(room_path)[:-4] + '_gt.txt'
        out_gt_label_filename = os.path.join(DUMP_DIR, out_gt_label_filename)
        print(room_path, out_data_label_filename)
        a, b = eval_one_epoch(sess, ops, room_path, out_data_label_filename,
                              out_gt_label_filename)
        total_correct += a
        total_seen += b
        fout_out_filelist.write(out_data_label_filename + '\n')
    fout_out_filelist.close()
    log_string('all room eval accuracy: %f' %
               (total_correct / float(total_seen)))
Exemplo n.º 5
0
def main(config):
    import torch
    from model import get_model, get_loss, get_converter, get_post_processing
    from metric import get_metric
    from data_loader import get_dataloader
    from tools.rec_trainer import RecTrainer as rec
    from tools.det_trainer import DetTrainer as det
    if torch.cuda.device_count() > 1:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl",
            init_method="env://",
            world_size=torch.cuda.device_count(),
            rank=args.local_rank)
        config['distributed'] = True
    else:
        config['distributed'] = False
    config['local_rank'] = args.local_rank
    train_loader = get_dataloader(config['dataset']['train'],
                                  config['distributed'])
    assert train_loader is not None
    if 'validate' in config['dataset']:
        validate_loader = get_dataloader(config['dataset']['validate'], False)
    else:
        validate_loader = None

    criterion = get_loss(config['loss']).cuda()

    if config.get('post_processing', None):
        post_p = get_post_processing(config['post_processing'])
    else:
        post_p = None

    metric = get_metric(config['metric'])

    if config['arch']['algorithm'] == 'rec':
        converter = get_converter(config['converter'])
        config['arch']['num_class'] = len(converter.character)
        model = get_model(config['arch'])
    else:
        converter = None
        model = get_model(config['arch'])

    trainer = eval(config['arch']['algorithm'])(
        config=config,
        model=model,
        criterion=criterion,
        train_loader=train_loader,
        post_process=post_p,
        metric=metric,
        validate_loader=validate_loader,
        converter=converter)
    trainer.train()
Exemplo n.º 6
0
def test():
    with tf.Graph().as_default():
        with tf.device('/gpu:0'):
            src_mesh = model.mesh_placeholder_inputs(BATCH_SIZE, MAX_NVERTS,
                                                     MAX_NTRIS, NUM_POINTS,
                                                     'src')
            ref_mesh = model.mesh_placeholder_inputs(BATCH_SIZE, MAX_NVERTS,
                                                     MAX_NTRIS, NUM_POINTS,
                                                     'ref')

            is_training_pl = tf.placeholder(tf.bool, shape=())
            print(is_training_pl)

            print("--- Get model")
            end_points = model.get_model(src_mesh, ref_mesh, NUM_POINTS,
                                         is_training_pl)
            loss, end_points = model.get_loss(end_points, NUM_PART_CATEGORIES)
            # Add ops to save and restore all the variables.

            # Create a session
            # with tf.device('/gpu:0'):
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            config.allow_soft_placement = True
            config.log_device_placement = False
            sess = tf.Session(config=config)
            # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

            # Init variables
            init = tf.global_variables_initializer()
            sess.run(init)

            saver = tf.train.Saver()
            ckptstate = tf.train.get_checkpoint_state(PRETRAINED_MODEL_PATH)
            if ckptstate is not None:
                LOAD_MODEL_FILE = os.path.join(
                    PRETRAINED_MODEL_PATH,
                    os.path.basename(ckptstate.model_checkpoint_path))
                saver.restore(sess, LOAD_MODEL_FILE)
                print("Model loaded in file: %s" % LOAD_MODEL_FILE)

            else:
                print("Fail to load modelfile: %s" % PRETRAINED_MODEL_PATH)
                return

            ops = {
                'src_mesh': src_mesh,
                'ref_mesh': ref_mesh,
                'is_training_pl': is_training_pl,
                'end_points': end_points
            }

            test_(sess, ops)
Exemplo n.º 7
0
def train():

    start = time()
    file_train = h5py.File("data/3DMNIST/train_point_clouds.h5", "r")
    file_test = h5py.File("data/3DMNIST/test_point_clouds.h5", "r")
    data_train = load_data(file_train, N)
    data_test = load_data(file_test, NE)
    log('Data loaded in %2fs' % (time() - start))

    with tf.Graph().as_default():

        is_training = tf.placeholder(tf.bool, shape=())
        inputs, labels = placeholder(B, N)
        pred = get_model(inputs, is_training, k=10, s=S, use_tnet=TNET, bn_mom=BN_MOM)
        loss = get_loss(pred, labels)

        learning_rate = tf.placeholder(tf.float32, shape=[])
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss)

        saver = tf.train.Saver()

        init = tf.global_variables_initializer()
        sess = tf.Session()
        sess.run(init)

        ops = {
            'inputs': inputs,
            'labels': labels,
            'is_training': is_training,
            'pred': pred,
            'loss': loss,
            'train_op': train_op,
            'learning_rate': learning_rate,
        }

        log('\nStart training\n')
        start = time()

        for ep in range(MAX_EPOCH):
            if ep == 20:
                global LR
                LR /= 10
            log("#### EPOCH {:03} ####".format(ep + 1))
            begin = time()
            train_one_epoch(data_train, sess, ops)
            log("---- Time elapsed: {:.2f}s".format(time() - begin))
            eval_one_epoch(data_test, sess, ops)
            save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))

        log("Total time: {:.2f}s".format(time() - start))
Exemplo n.º 8
0
def train():
    img = tf.placeholder(shape=[
        config.batch_size, config.Config['min_dim'], config.Config['min_dim'],
        3
    ],
                         dtype=tf.float32)
    anchors_num = sum([
        config.Config['feature_maps'][s]**2 * config.Config['aspect_num'][s]
        for s in range(6)
    ])

    loc = tf.placeholder(shape=[config.batch_size, anchors_num, 4],
                         dtype=tf.float32)
    conf = tf.placeholder(shape=[config.batch_size, anchors_num],
                          dtype=tf.float32)

    pred_loc, pred_confs, vbs = inception_500_ince.inception_v2_ssd(
        img, config)

    train_tensors, sum_op = get_loss(conf, loc, pred_loc, pred_confs, config)

    gen = data_gen.get_batch_inception(batch_size=config.batch_size,
                                       image_size=config.Config['min_dim'],
                                       max_detect=50)
    optimizer = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
    train_op = slim.learning.create_train_op(train_tensors, optimizer)

    saver = tf.train.Saver(vbs)

    def restore(sess):
        saver.restore(sess, '/home/dsl/all_check/inception_v2.ckpt')

    sv = tf.train.Supervisor(logdir='/home/dsl/all_check/face_detect/voc-1',
                             summary_op=None,
                             init_fn=restore)

    with sv.managed_session() as sess:
        for step in range(1000000000):

            images, true_box, true_label = q.get()

            loct, conft = np_utils.get_loc_conf(true_box,
                                                true_label,
                                                batch_size=config.batch_size,
                                                cfg=config.Config)
            feed_dict = {img: images, loc: loct, conf: conft}

            ls = sess.run(train_op, feed_dict=feed_dict)
            if step % 10 == 0:
                summaries = sess.run(sum_op, feed_dict=feed_dict)
                sv.summary_computed(sess, summaries)
                print(ls)
Exemplo n.º 9
0
def train():
    img = tf.placeholder(shape=[config.batch_size, config.Config['min_dim'], config.Config['min_dim'], 3], dtype=tf.float32)
    anchors_num = sum(
        [config.Config['feature_maps'][s] ** 2 * config.Config['aspect_num'][s] for s in range(6)])

    loc = tf.placeholder(shape=[config.batch_size, anchors_num, 4], dtype=tf.float32)
    conf = tf.placeholder(shape=[config.batch_size, anchors_num], dtype=tf.float32)

    pred_loc, pred_confs, vbs = inceptionv3_500_ince.inception_v2_ssd(img,config)


    train_tensors = get_loss(conf, loc, pred_loc, pred_confs,config)
    global_step = get_or_create_global_step()

    # Define your exponentially decaying learning rate
    lr = tf.train.exponential_decay(
        learning_rate=0.001,
        global_step=global_step,
        decay_steps=20000,
        decay_rate=0.7,
        staircase=True)
    tf.summary.scalar('lr',lr)
    sum_op = tf.summary.merge_all()

    gen = data_gen.get_batch_inception(batch_size=config.batch_size,image_size=config.Config['min_dim'],max_detect=50)
    optimizer = tf.train.MomentumOptimizer(learning_rate=lr,momentum=0.9)
    train_op = slim.learning.create_train_op(train_tensors, optimizer)

    saver = tf.train.Saver(vbs)

    def restore(sess):
        saver.restore(sess, '/home/dsl/all_check/inception_v3.ckpt')

    sv = tf.train.Supervisor(logdir='/home/dsl/all_check/face_detect/voc-v32', summary_op=None, init_fn=restore)

    with sv.managed_session() as sess:
        for step in range(1000000000):

            images, true_box, true_label = q.get()

            loct, conft = np_utils.get_loc_conf(true_box, true_label, batch_size=config.batch_size,cfg=config.Config)
            feed_dict = {img: images, loc: loct,
                         conf: conft}
            t = time.time()
            ls,step = sess.run([train_op,global_step], feed_dict=feed_dict)
            if step % 10 == 0:
                print(time.time()-t)
                summaries = sess.run(sum_op, feed_dict=feed_dict)
                sv.summary_computed(sess, summaries)
                print(ls)
Exemplo n.º 10
0
def initTF():
    global tf_session, ops
    with tf.device('/gpu:0'):
        pointclouds_pl, labels_pl = model.placeholder_inputs(
            BATCH_SIZE, NUM_POINTS)
        is_training_pl = tf.placeholder(tf.bool, shape=())
        pred, end_points = model.get_model(pointclouds_pl, is_training_pl)
        model.get_loss(pred, labels_pl, end_points)
        losses = tf.get_collection('losses')
        total_loss = tf.add_n(losses, name='total_loss')
        saver = tf.train.Saver()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        tf_session = tf.Session(config=config)
        model_path = "/home/gstavrinos/libs/python/python2.7/pointnet2/log/model.ckpt"
        saver.restore(tf_session, model_path)
        ops = {
            "pointclouds_pl": pointclouds_pl,
            "is_training_pl": is_training_pl,
            "pred": pred
        }
Exemplo n.º 11
0
def evaluate():
    with tf.Graph().as_default() as g, tf.device("/gpu:0"):
        FLAGS.batch_size = 100
        images, labels = mnist_input.validate_input()
        label_vector = tf.one_hot(labels, 10, dtype=tf.float32)

        network = model.Network()
        logits = network.inference(images)
        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        entropy, loss = model.get_loss(label_vector, logits)

        summary_writer = tf.train.SummaryWriter(FLAGS.dir_log, g)

        while True:
            eval_once(summary_writer, top_k_op, entropy)
            time.sleep(FLAGS.eval_interval_secs)
Exemplo n.º 12
0
 def _pipeline(self, config, inputs):
     if config.model == 'cbow':
         model_func = cbow_forward
     elif config.model == 'rnn':
         model_func = rnn_forward
     elif config.model == 'att':
         model_func = attention_forward
     else:
         raise NotImplementedError()
     self.variables, outputs = model_func(config, inputs)
     loss, grads = None, None
     if config.supervised:
         loss = get_loss(config, inputs, outputs)
         if config.is_train:
             grads = self.opt.compute_gradients(loss)
     return outputs, loss, grads
Exemplo n.º 13
0
def evaluate():
    with tf.Graph().as_default() as g, tf.device("/gpu:0"):
        dataset = coco_input.get_dataset()
        labels, images = dataset.validate_input()

        network = model.Network(is_train=False)
        logits = network.inference(images)
        entropy, _ = model.get_loss(labels, logits)

        top_k_op = tf.nn.in_top_k(logits, labels, 1)

        summary_writer = tf.train.SummaryWriter(FLAGS.dir_log_val, g)

        while True:
            eval_once(summary_writer, top_k_op, entropy)
            time.sleep(FLAGS.eval_interval_secs)
Exemplo n.º 14
0
def train():
    with tf.Graph().as_default():
        features, labels = model.placeholder_inputs(BATCH_SIZE, NUM_FEATURES)

        pred = model.get_model(features)
        # with tf.name_scope('loss') as scope:
        loss = model.get_loss(pred, labels)
        tf.summary.scalar('loss', loss)

        total, count = tf.metrics.accuracy(labels=tf.to_int64(labels),
                                           predictions=tf.argmax(pred, 1),
                                           name='accuracy')
        tf.summary.scalar('accuracy', count)

        # Get training operator
        optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
        train_op = optimizer.minimize(loss)

        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()

        # Create a session
        sess = tf.Session()

        # Add summary writers
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)

        # Init variables
        init = tf.global_variables_initializer()
        local = tf.local_variables_initializer()
        sess.run(init)
        sess.run(local)

        for epoch in range(NUM_EPOCHS):
            data, label = preprocess.load_data()

            feed_dict = {features: data, labels: label}
            summary, _, loss_val, pred_val, accurate = sess.run(
                [merged, train_op, loss, pred, count], feed_dict=feed_dict)
            train_writer.add_summary(summary, epoch)
            print(accurate)

            save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
    return
Exemplo n.º 15
0
def train():
    img = tf.placeholder(
        shape=[config.batch_size, cfg["min_dim"], cfg["min_dim"], 3], dtype=tf.float32
    )
    anchors_num = sum(
        [cfg["feature_maps"][s] ** 2 * cfg["aspect_num"][s] for s in range(6)]
    )

    loc = tf.placeholder(shape=[config.batch_size, anchors_num, 4], dtype=tf.float32)
    conf = tf.placeholder(shape=[config.batch_size, anchors_num], dtype=tf.float32)

    pred_loc, pred_confs, vbs = mobile.nana_mobile(img, config)

    train_tensors, sum_op = get_loss(conf, loc, pred_loc, pred_confs, config)

    gen = data_gen.get_batch(batch_size=config.batch_size, image_size=cfg["min_dim"])
    optimizer = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)
    train_op = slim.learning.create_train_op(train_tensors, optimizer)

    saver = tf.train.Saver(vbs)

    def restore(sess):
        saver.restore(sess, "/home/dsl/all_check/nasnet-a_mobile_04_10_2017/model.ckpt")

    sv = tf.train.Supervisor(
        logdir="/home/dsl/all_check/face_detect/nana", summary_op=None, init_fn=restore
    )

    with sv.managed_session() as sess:
        for step in range(1000000000):

            images, true_box, true_label = next(gen)
            loct, conft = np_utils.get_loc_conf(
                true_box, true_label, batch_size=config.batch_size, cfg=cfg
            )
            feed_dict = {img: images, loc: loct, conf: conft}

            ls = sess.run(train_op, feed_dict=feed_dict)
            if step % 10 == 0:
                summaries = sess.run(sum_op, feed_dict=feed_dict)
                sv.summary_computed(sess, summaries)
                print(ls)
Exemplo n.º 16
0
def train():
    global_step = tf.Variable(0, trainable=False)

    image, label = mnist_input.train_input()

    network = model.Network()
    logits = network.inference(image, is_train=True)

    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    entropy, loss = model.get_loss(label, logits)

    lr, opt = get_opt(loss, global_step)

    saver = tf.train.Saver(tf.trainable_variables())
    summary_op = tf.merge_all_summaries()

    gpu_options = tf.GPUOptions(allow_growth=True)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        init = tf.initialize_all_variables()
        sess.run(init)
        summary_writer = tf.train.SummaryWriter("log", sess.graph)

        tf.train.start_queue_runners(sess=sess)

        for num_iter in range(1, 1000000):
            value_entropy, value_loss, value_lr, _ = sess.run(
                [entropy, loss, lr, opt])

            if num_iter % 100 == 0:
                print "lr = {}  entropy = {} loss = {}".format(
                    value_lr, value_entropy, value_loss)
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, num_iter)

            if num_iter % 1000 == 0:

                checkpoint_path = os.path.join(FLAGS.dir_parameter,
                                               'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=num_iter)
Exemplo n.º 17
0
def train_step(train_data,optimizer,dev_data):
    model.train()
    count=0
    total_loss=0
    for j in range(0, len(train_data), config.batch_size):
        optimizer.zero_grad()
        print("run bactch : % d" % j)
        batch = train_data[j:j + config.batch_size]
        sentence_tensor, tags_tensor,length_tensor=get_batch(batch)
        loss=model.get_loss(sentence_tensor,tags_tensor,length_tensor)
        loss.backward()
        optimizer.step()
        print("minibatch : %d , loss : %.5f " % (j,loss.item()))
        total_loss+=loss.item()
        count+=1
    print("-------------------------------------------------------------")
    print("avg loss : %.5f"%(total_loss/count))
    print("-------------------------------------------------------------")
    f1=dev_step(dev_data)
    return f1
Exemplo n.º 18
0
def model_fn(features, labels, mode, params):
    if mode == tf.estimator.ModeKeys.PREDICT:
        raise RuntimeError("mode {} is not supported yet".format(mode))

    loss = get_loss(features, labels, args)

    if mode == tf.estimator.ModeKeys.TRAIN:
        learning_rate = tf.compat.v1.train.exponential_decay(
            args.lr,
            tf.compat.v1.train.get_global_step(),
            decay_steps=100000,
            decay_rate=0.96)

        optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=learning_rate)

        if args.use_tpu:
            optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer)

        return tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=optimizer.minimize(loss, tf.compat.v1.train.get_global_step()))
Exemplo n.º 19
0
 def validation(self, data_valid, step, data, loss_weight_base, value_weight, value_ratio):
     self.model.eval()
     running_valid_loss = 0
     for inp, out, out_real, lens in data_valid:
         loss, y_p = forecast_model.get_loss(inp=inp,
                                             out=out,
                                             lens=lens,
                                             cuda=True,
                                             gn=self.model,
                                             glucose_dat=data,
                                             criterion=self.criterion,
                                             base=loss_weight_base,
                                             out_real=out_real,
                                             value_weight=value_weight,
                                             value_ratio=value_ratio)
         step += 1
         running_valid_loss += loss.data.cpu().numpy()[0]
     running_valid_loss = running_valid_loss / len(data_valid)
     print('validation loss: {:.3f}'.format(running_valid_loss))
     self.writer.add_scalar(tag='valid_total_loss',
                            scalar_value=running_valid_loss,
                            global_step=step)
     self.model.train()
     return running_valid_loss
Exemplo n.º 20
0
    def train_sup(self, epoch_lim, data, valid_data, early_stopping_lim,
                  batch_size, num_workers, track_embeddings, validation_rate, loss_weight_base=1,
                  value_weight=0, value_ratio=0):
        """
        Training loop
        :param epoch_lim: total number of training epochs
        :param data: training data
        :param valid_data: validation data
        :param early_stopping_lim: Number of epochs to run without validation improvement before stopping
        if None, never stop early
        :param batch_size: training batch_size
        :param num_workers: number of CPU workers to use for data loading
        :param track_embeddings: Save out embedding information at end of run
        :param validation_rate: Check validation performance every validation_rate training epochs
        :param loss_weight_base: A constant between 0 and 1 used to interpolate between Single (=0) and Multi (=1) Step forecasting.
        :param value_weight: A constant multiplier for the real-value loss, set to 0 in the paper
        :param value_ratio: The proportion of loss used for the MSE loss term (as opposed for the cross-entropy loss), set to 0 in the paper
        :return loss array, model:
        """
        if early_stopping_lim is None:
            early_stopping_lim = epoch_lim
        train_sampler = sampler.RandomSampler(np.arange(len(data)))
        data_train = DataLoader(data,
                                batch_size=batch_size,
                                sampler=train_sampler,
                                drop_last=True)

        valid_sampler = sampler.SequentialSampler(np.arange(len(valid_data)))
        data_valid = DataLoader(valid_data,
                                batch_size=batch_size,
                                sampler=valid_sampler)
        step = 0

        bsf_loss = np.inf
        epochs_without_improvement = 0
        improvements = []
        for epoch in range(epoch_lim):
            if epochs_without_improvement > early_stopping_lim:
                print('Exceeded early stopping limit, stopping')
                break
            if epoch % validation_rate == 0:
                valid_loss = self.validation(data_valid=data_valid,
                                             step=step,
                                             data=data,
                                             loss_weight_base=loss_weight_base,
                                             value_weight=value_weight, value_ratio=value_ratio)
                (bsf_loss,
                 epochs_without_improvement,
                 improvements) = self.manage_early_stopping(bsf_loss=bsf_loss,
                                                            early_stopping_lim=early_stopping_lim,
                                                            epochs_without_improvement=epochs_without_improvement,
                                                            valid_loss=valid_loss, validation_rate=validation_rate,
                                                            improvements=improvements)
            running_train_loss = 0
            for inp, out, out_real, lens in tqdm(data_train):
                loss, y_p = forecast_model.get_loss(inp=inp,
                                                    out=out,
                                                    lens=lens,
                                                    cuda=True,
                                                    gn=self.model,
                                                    glucose_dat=data,
                                                    criterion=self.criterion,
                                                    base=loss_weight_base,
                                                    out_real=out_real,
                                                    value_weight=value_weight,
                                                    value_ratio=value_ratio)
                step += 1
                running_train_loss += loss.data.cpu().numpy()[0]
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            running_train_loss = running_train_loss/len(data_train)
            self.writer.add_scalar(tag='train_loss',
                                   scalar_value=running_train_loss,
                                   global_step=step)
        torch.save(self.model.state_dict(), '{}/final_sup.pt'.format(self.model_dir))
        if track_embeddings:
            self.embed(data_valid, step, embed_batch=100)
        return improvements
Exemplo n.º 21
0
def main():
    args = parse_args()
    print("Params:")
    print(args)
    print()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    X = tf.placeholder(tf.float32, [17770, None], name='X')
    Y = tf.placeholder(tf.float32, [17770, None], name='Y')
    Yhat, weights = model.autoencoder(X,
                                      args.layers,
                                      keep_prob=(1.0 - args.dropout),
                                      constrained=args.constrained)
    YhatDev, weights = model.autoencoder(X,
                                         args.layers,
                                         constrained=args.constrained,
                                         weights=weights)
    loss = model.get_loss(Y, Yhat)
    loss_sum, loss_examples = model.get_test_loss(Y, Yhat)
    loss_sum_dev, loss_examples_dev = model.get_test_loss(Y, YhatDev)
    losses = (loss, loss_sum, loss_examples, loss_sum_dev, loss_examples_dev)
    optimizer = model.get_optimizer(args.optimizer_type, args.lr,
                                    args.momentum)

    if args.small_dataset:
        train_path = "../data/netflix/output_small_train"
        dev_path = "../data/netflix/output_small_dev"
        test_path = "../data/netflix/output_small_test"
    else:
        train_path = "../data/netflix/output_train"
        dev_path = "../data/netflix/output_dev"
        test_path = "../data/netflix/output_test"

    data_train = data_manager.Data(size=args.chunk_size,
                                   batch=args.batch_size,
                                   path=train_path)
    data_dev = data_manager.Data(size=args.chunk_size,
                                 batch=args.batch_size,
                                 path=dev_path,
                                 test=True)
    data_test = data_manager.Data(size=args.chunk_size,
                                  batch=args.batch_size,
                                  path=test_path,
                                  test=True)

    train_losses, eval_losses = model.train(
        data_train,
        data_dev,
        losses,
        optimizer,
        X,
        Y,
        Yhat,
        epochs=args.epochs,
        dense_refeeding=args.dense_refeeding)

    model.test(data_test, X, Y, YhatDev)

    t, = plt.plot([i + 1 for i in range(len(train_losses))],
                  train_losses,
                  label="Train")
    e, = plt.plot([i + 1 for i in range(len(eval_losses))],
                  eval_losses,
                  label="Dev")
    plt.legend(handles=[t, e])
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.show()

    print([i + 1 for i in range(len(train_losses))])
    print(train_losses)

    print([i + 1 for i in range(len(eval_losses))])
    print(eval_losses)
Exemplo n.º 22
0
def train(rank, world_size, args):
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = args.port
    dist.init_process_group('nccl', rank=rank, world_size=world_size)
    torch.cuda.set_device(rank)

    tokenizer = BertTokenizer.from_pretrained('bert-large-cased')
    train_data = MultiTaskDataset([get_data(t, 'train') for t in args.tasks])
    train_sampler = DistMultiTaskBatchSampler(train_data,
                                              args.batch_size,
                                              drop_last=True,
                                              rank=rank,
                                              world_size=world_size)
    train_loader = DataLoader(
        train_data,
        batch_sampler=train_sampler,
        collate_fn=lambda x: collate(tokenizer, x, args.max_length),
        pin_memory=True)
    dev_data = MultiTaskDataset([get_data(t, 'dev') for t in args.tasks])
    dev_sampler = DistMultiTaskBatchSampler(dev_data,
                                            args.batch_size,
                                            drop_last=False,
                                            rank=rank,
                                            world_size=world_size)
    dev_loader = DataLoader(dev_data,
                            batch_sampler=dev_sampler,
                            collate_fn=lambda x: collate(tokenizer, x),
                            pin_memory=True)

    model = BertMultiTask([get_n_classes(t) for t in args.tasks],
                          [get_loss(t) for t in args.tasks]).cuda()
    model = DDP(model, device_ids=[rank], find_unused_parameters=True)

    optimizer = torch.optim.Adamax(model.parameters(), args.lr)
    step = 0
    if rank == 0:
        writer = SummaryWriter(args.log_dir)

    for epoch in range(args.n_epoch):
        model.train()
        batch_time = data_time = total_loss = 0
        start = time()
        for b, (inputs, labels, task_id) in enumerate(train_loader):
            step += 1
            inputs = {key: inputs[key].cuda() for key in inputs}
            labels = labels.cuda()
            data_time += time() - start

            logits, loss = model(inputs, task_id, labels)
            dist.reduce(loss, 0)
            total_loss += loss.item() / world_size

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = torch.zeros(len(args.tasks)).cuda()
            acc_sub = torch.tensor(0).cuda()
            task = torch.zeros(len(args.tasks)).cuda()
            task[task_id] = 1
            with torch.no_grad():
                if args.tasks[task_id] == 5:
                    correct = (logits > 0) == labels.bool()
                    acc[task_id] = correct.float().mean()
                    acc_sub = correct.all(dim=1).float().mean()
                else:
                    acc[task_id] = (logits.argmax(
                        dim=1) == labels).float().mean()
            dist.reduce(acc, 0)
            dist.reduce(acc_sub, 0)
            dist.reduce(task, 0)

            if rank == 0:
                writer.add_scalar(f'train/loss', loss.item(), step)
                for i, t in enumerate(args.tasks):
                    if task[i] > 0:
                        name = get_task_name(t)
                        writer.add_scalar(f'train/{name}_acc',
                                          acc[i] / task[i] * 100, step)
                        if t == 5:
                            writer.add_scalar(f'train/{name}_acc_sub',
                                              acc_sub / task[i] * 100, step)

                if (b + 1) % args.print_freq == 0:
                    print(
                        f'Epoch {epoch+1} Train: {(b+1):05d}/{len(train_loader):05d} ' \
                        f'Batch {(batch_time/(b+1)):.3f}s Data {(data_time/(b+1)):.3f}s ' \
                        f'Loss {(total_loss/(b+1)):.4f}'
                    )

            batch_time += time() - start
            start = time()

        model.eval()
        batch_time = data_time = 0
        all_correct = torch.tensor(0).cuda()
        correct = torch.zeros(len(args.tasks)).cuda()
        total = torch.zeros(len(args.tasks)).cuda()
        tp = [torch.zeros(get_n_classes(t)).cuda() for t in args.tasks]
        fp = [torch.zeros(get_n_classes(t)).cuda() for t in args.tasks]
        fn = [torch.zeros(get_n_classes(t)).cuda() for t in args.tasks]
        start = time()
        for inputs, labels, task_id in dev_loader:
            inputs = {key: inputs[key].cuda() for key in inputs}
            labels = labels.cuda()
            data_time += time() - start

            with torch.no_grad():
                logits = model(inputs, task_id)

                if args.tasks[task_id] == 5:
                    correct[task_id] += ((logits >
                                          0) == labels.bool()).float().mean(
                                              dim=1).sum()
                    all_correct += ((logits > 0) == labels.bool()).all(
                        dim=1).sum()
                    tp[task_id] += ((logits > 0) & labels.bool()).sum(dim=0)
                    fp[task_id] += ((logits > 0) & ~labels.bool()).sum(dim=0)
                    fn[task_id] += ((logits <= 0) & labels.bool()).sum(dim=0)
                else:
                    for p, l in zip(logits.argmax(dim=1), labels):
                        if p == l:
                            correct[task_id] += 1
                            tp[task_id][p] += 1
                        else:
                            fp[task_id][p] += 1
                            fn[task_id][l] += 1
                total[task_id] += labels.shape[0]

            batch_time += time() - start
            start = time()

        dist.reduce(all_correct, 0)
        dist.reduce(correct, 0)
        dist.reduce(total, 0)
        for tpl, fpl, fnl in zip(tp, fp, fn):
            dist.reduce(tpl, 0)
            dist.reduce(fpl, 0)
            dist.reduce(fnl, 0)

        if rank == 0:
            l = len(dev_loader)
            str_out = f'Epoch {epoch+1}  Dev : {l:05d}/{l:05d} ' \
                      f'Batch {(batch_time/l):.3f}s Data {(data_time/l):.3f}s '
            for i, t in enumerate(args.tasks):
                name = get_task_name(t)
                acc = correct[i] / total[i] * 100
                writer.add_scalar(f'dev/{name}_acc', acc, step)
                f1 = (tp[i] / (tp[i] + (fp[i] + fn[i]) / 2)).mean() * 100
                writer.add_scalar(f'dev/{name}_f1', f1, step)
                if t == 5:
                    acc_sub = all_correct / total[i] * 100
                    writer.add_scalar(f'dev/{name}_acc_sub', acc_sub, step)
                    str_out += f'{name} Acc {acc_sub.item():.2f} F1 {f1.item():.2f} '
                else:
                    str_out += f'{name} Acc {acc.item():.2f} F1 {f1.item():.2f} '
            print(str_out)

    if rank == 0:
        torch.save(model.module.state_dict(),
                   f'{args.log_dir}/epoch_{epoch+1}.pth')

    dist.destroy_process_group()
Exemplo n.º 23
0
batch_size = 70
images, y_ = input.input_pipeline(filenames, labels, batch_size)

sess = tf.Session()

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

with tf.variable_scope("model") as scope:
    adver_y = model.model(images, False)

shifted_y_ = tf.concat(
    1, [tf.slice(y_, [0, 1], [-1, 1]),
        tf.slice(y_, [0, 0], [-1, 1])])
adver_loss = model.get_loss(adver_y, shifted_y_)

grad = tf.gradients(adver_loss, images)[0]

#scale_grad = tf.abs(tf.truncated_normal(shape=grad.get_shape(), stddev=.01))
update_prob = .1
update_mag = .01
scale_grad = tf.to_float(
    tf.random_uniform(shape=[batch_size]) > update_prob) * update_mag
grad_shape = grad.get_shape().as_list()
scale_grad = tf.tile(scale_grad,
                     [grad_shape[1] * grad_shape[2] * grad_shape[3]])
scale_grad = tf.reshape(scale_grad, grad_shape[1:4] + [batch_size])
scale_grad = tf.transpose(scale_grad, [3, 0, 1, 2])
update = -tf.mul(tf.sign(grad), scale_grad)
Exemplo n.º 24
0
                         num_workers=4)
testloader = DataLoader(dataset=test_dataset,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        num_workers=4)

# ============================ step 2/6 模型 ============================
if args.pointnet:
    net = get_model(40, normal_channel=args.normal)
else:
    net = cls_3d()
net.to(device)

# ============================ step 3/6 损失函数 ============================
if args.pointnet:
    criterion = get_loss()  # 负对数似然损失
else:
    criterion = nn.CrossEntropyLoss()  # 选择损失函数

# ============================ step 4/6 优化器 ============================
# 选择优化器
if args.optim == 'sgd':
    optimizer = optim.SGD(net.parameters(),
                          lr=LR,
                          momentum=0.9,
                          weight_decay=L2_REG)
elif args.optim == 'adagrad':
    optimizer = optim.Adagrad(net.parameters(), lr=LR, weight_decay=L2_REG)
elif args.optim == 'rmsprop':
    optimizer = optim.RMSprop(net.parameters(),
                              lr=LR,
Exemplo n.º 25
0
def train():
    """Train the model on a single GPU
    """
    with tf.Graph().as_default():
        stacker, stack_validation, stack_train = init_stacking()

        with tf.device("/gpu:" + str(PARAMS["gpu"])):
            pointclouds_pl, labels_pl, smpws_pl = model.get_placeholders(
                PARAMS["num_point"], hyperparams=PARAMS)
            is_training_pl = tf.compat.v1.placeholder(tf.bool, shape=())

            # Note the global_step=batch parameter to minimize.
            # That tells the optimizer to helpfully increment the 'batch' parameter for
            # you every time it trains.
            batch = tf.Variable(0)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar("bn_decay", bn_decay)

            print("--- Get model and loss")
            # Get model and loss
            pred, end_points = model.get_model(
                pointclouds_pl,
                is_training_pl,
                NUM_CLASSES,
                hyperparams=PARAMS,
                bn_decay=bn_decay,
            )
            loss = model.get_loss(pred, labels_pl, smpws_pl, end_points)
            tf.summary.scalar("loss", loss)

            # Compute accuracy
            correct = tf.equal(tf.argmax(pred, 2),
                               tf.compat.v1.to_int64(labels_pl))
            accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(
                PARAMS["batch_size"] * PARAMS["num_point"])
            tf.summary.scalar("accuracy", accuracy)

            # Computer mean intersection over union
            mean_intersection_over_union, update_iou_op = tf.compat.v1.metrics.mean_iou(
                tf.compat.v1.to_int32(labels_pl),
                tf.compat.v1.to_int32(tf.argmax(pred, 2)), NUM_CLASSES)
            tf.summary.scalar(
                "mIoU", tf.compat.v1.to_float(mean_intersection_over_union))

            print("--- Get training operator")
            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar("learning_rate", learning_rate)
            if PARAMS["optimizer"] == "momentum":
                optimizer = tf.train.MomentumOptimizer(
                    learning_rate, momentum=PARAMS["momentum"])
            else:
                assert PARAMS["optimizer"] == "adam"
                optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(loss, global_step=batch)

            # Add ops to save and restore all the variables.
            saver = tf.compat.v1.train.Saver()

        # Create a session
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.compat.v1.Session(config=config)

        # Add summary writers
        merged = tf.compat.v1.summary.merge_all()
        train_writer = tf.compat.v1.summary.FileWriter(
            os.path.join(PARAMS["logdir"], "train"), sess.graph)
        validation_writer = tf.compat.v1.summary.FileWriter(
            os.path.join(PARAMS["logdir"], "validation"), sess.graph)

        # Init variables
        sess.run(tf.compat.v1.global_variables_initializer())
        sess.run(
            tf.compat.v1.local_variables_initializer())  # important for mIoU

        ops = {
            "pointclouds_pl": pointclouds_pl,
            "labels_pl": labels_pl,
            "smpws_pl": smpws_pl,
            "is_training_pl": is_training_pl,
            "pred": pred,
            "loss": loss,
            "train_op": train_op,
            "merged": merged,
            "step": batch,
            "end_points": end_points,
            "update_iou": update_iou_op,
        }

        # Train for hyper_params["max_epoch"] epochs
        best_acc = 0
        for epoch in range(PARAMS["max_epoch"]):
            print("in epoch", epoch)
            print("max_epoch", PARAMS["max_epoch"])

            log_string("**** EPOCH %03d ****" % (epoch))
            sys.stdout.flush()

            # Train one epoch
            train_one_epoch(sess, ops, train_writer, stack_train)

            # Evaluate, save, and compute the accuracy
            if epoch % 5 == 0:
                acc = eval_one_epoch(sess, ops, validation_writer,
                                     stack_validation)

            if acc > best_acc:
                best_acc = acc
                save_path = saver.save(
                    sess,
                    os.path.join(PARAMS["logdir"],
                                 "best_model_epoch_%03d.ckpt" % (epoch)),
                )
                log_string("Model saved in file: %s" % save_path)
                print("Model saved in file: %s" % save_path)

            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(
                    sess, os.path.join(PARAMS["logdir"], "model.ckpt"))
                log_string("Model saved in file: %s" % save_path)
                print("Model saved in file: %s" % save_path)

        # Kill the process, close the file and exit
        stacker.terminate()
        LOG_FOUT.close()
        sys.exit()
Exemplo n.º 26
0
import tensorflow as tf
import input
import model

with tf.variable_scope("input"):
    filenames, labels = input.get_filenames_labels(12500, .90, True,
                                                   "../train_preprocessed2")
    x, y_ = input.input_pipeline(filenames, labels, 80)

with tf.variable_scope("model") as scope:
    y = model.model(x, True)

with tf.variable_scope("optimizer"):
    loss = model.get_loss(y, y_)
    optimizer = model.get_optimizer(loss)

with tf.variable_scope("error"):
    error = model.get_error(y, y_)

saver = tf.train.Saver()

with tf.variable_scope("summary"):
    logs_path = "../logs"
    merged_summary_op = model.get_summary_op(x, loss, error)

sv = tf.train.Supervisor(logdir="../logs",
                         init_op=tf.global_variables_initializer(),
                         summary_op=merged_summary_op,
                         saver=saver,
                         save_summaries_secs=60,
                         save_model_secs=600)
Exemplo n.º 27
0
def main():
    if len(sys.argv) != 7:
        print(
            "Usage: {0} <data directory> <hidden layer size> <min song length> <steps> <epochs> <batch_size>"
            .format(sys.argv[0]))
        exit(2)

    path = sys.argv[1]
    hidden_size = int(sys.argv[2])
    min_len = int(sys.argv[3])
    steps = int(sys.argv[4])
    epochs = int(sys.argv[5])
    batch_size = int(sys.argv[6])

    all_songs = get_songs(path)
    print('Preprocessed Songs')
    total_songs = len(all_songs)
    input_size = all_songs[0].shape[1]
    output_size = input_size
    rnn_units = hidden_size
    learning_rate = 0.001
    keep_probability = 0.6
    disp = 1
    print(total_songs, input_size)
    print(all_songs[0].shape)

    model_inputs, model_targets, keep_prob, lr = model_placeholders(
        input_size, output_size, steps)
    parameters = model_parameters(output_size, hidden_size)  #w1, b1
    final_outputs, prediction = rnn_layer(model_inputs, parameters, rnn_units,
                                          keep_prob, steps)
    loss = get_loss(final_outputs, model_targets)
    optimizer = get_optimizer(loss, lr)
    accuracy = get_accuracy(model_targets, prediction)

    init = tf.global_variables_initializer()
    session = tf.Session()

    print('Start Training')
    with session as sess:
        sess.run(init)
        for epoch in range(epochs):
            inputs, targets = generate_batches(all_songs, batch_size, steps,
                                               input_size, output_size)
            feed_dict = {
                model_inputs: inputs,
                model_targets: targets,
                keep_prob: keep_probability,
                lr: learning_rate
            }
            sess.run(optimizer, feed_dict=feed_dict)

            if epoch % disp == 0 or epoch == 10:
                l, a = sess.run([loss, accuracy], feed_dict=feed_dict)
                s = 'Epoch: {}, Loss: {:.4f}, Accuracy: {:.3f} \n'.format(
                    epoch, l, a)

                logger(epoch, epochs, s=s)

    # Generate new midi files
        get_random = False
        idx = 11 if get_random else np.random.randint(total_songs)
        song = all_songs[idx][:steps].tolist()

        print('Sampling new music')
        for i in range(100):

            initial = np.array([song[-steps]])
            sample = sess.run(prediction, feed_dict={model_inputs, initial})
            new_songs = sample_music(sample, output_size, song)

        sample_midi(new_songs, name='gen_1')
        sample_midi(all_songs[idx], name='base_1')
Exemplo n.º 28
0
def train_neural_network():
    tf.reset_default_graph()

    with tf.Session() as sess:

        sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        # initialize lookup table
        table = initialize_lookup_table()

        train_feature_filenames, train_label_filenames = get_filenames()

        with tf.name_scope('raw_inputs'):
            features, raw_labels = input.getFiles(train_feature_filenames,
                                                  train_label_filenames)

        with tf.name_scope('processed_labels'):
            labels = preprocess_labels(raw_labels, table)

        output, test_output, test_features, test_labels = model.create_model(
            features, labels)

        with tf.name_scope('loss'):
            loss = model.get_loss(output, labels)

        with tf.name_scope('training_accuracy'):
            training_accuracy = model.compute_accuracy(output, labels)

        with tf.name_scope('dev_accuracy'):
            dev_accuracy = model.compute_accuracy(test_output, test_labels)

        train_step = model.get_optimizer(loss)
        training_fetches = [
            features, raw_labels, labels, output, loss, training_accuracy,
            train_step
        ]

        # initialize variables
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        # add graph summary for tensorboard
        writer = tf.summary.FileWriter(constants.TENSORBOARD_DIR, sess.graph)

        # start queue runner for data loading
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        # get dev features
        dev_features, dev_labels = sess.run([features, labels])
        # check if we received the labels correctly or not
        print dev_labels

        for epoch in range(1, constants.EPOCHS + 1):
            for batch in range(1, constants.NUM_BATCHES + 1):
                # train the model
                model_features, model_raw_labels, model_labels, model_output, model_loss, model_accuracy, _ = sess.run(
                    training_fetches)
                print "Epoch {}/{} ; Batch {}/{} ; Accuracy {} ; Loss {}".format(
                    epoch, constants.EPOCHS, batch, constants.NUM_BATCHES,
                    model_accuracy, model_loss)
                print model_output
                # evaluate the accuracy
                if (batch % constants.TEST_PERIOD == 0):
                    mdev_accuracy = sess.run(dev_accuracy,
                                             feed_dict={
                                                 test_features: dev_features,
                                                 test_labels: dev_labels
                                             })
Exemplo n.º 29
0
def main():
    
    #Training Data
    xtrain = 'Xtrain.txt'
    ytrain = 'Ytrain.txt'
    
    #Validation Data
    xtest = 'Xtest.txt'
    ytest = 'Ytest.txt'
    
    # Training Parameters
    batch_size = 500  # Batch size
    num_epochs = 5  # Number epochs
    train_holdout = 0.2  # Portion of training features used for valisation
    learning_rate = 0.005  # Starting learning rate
    steps_per_epoch = 50 # Number of training steps per epoch
    
#----- Begin Main Code    
    
    # Get environment variables
    try:
        job_name = os.environ['JOB_NAME']
        task_index = os.environ['TASK_INDEX']
        ps_hosts = os.environ['PS_HOSTS']
        worker_hosts = os.environ['WORKER_HOSTS']
    except:
        job_name = None
        task_index = 0
        ps_hosts = None
        worker_hosts = None
        
    # Get local file paths
    PATH_TO_LOCAL_LOGS = os.path.expanduser(LOCAL_LOG_LOCATION)
    ROOT_PATH_TO_LOCAL_DATA = os.path.expanduser(LOCAL_DATASET_LOCATION)
   
    # Flags
    flags = tf.app.flags
    FLAGS = flags.FLAGS

    # Flags for environment variables
    flags.DEFINE_string("job_name", job_name,
                        "job name: worker or ps")
    flags.DEFINE_integer("task_index", task_index,
                         "Worker task index, should be >= 0. task_index=0 is "
                         "the chief worker task that performs the variable "
                         "initialization and checkpoint handling")
    flags.DEFINE_string("ps_hosts", ps_hosts,
                        "Comma-separated list of hostname:port pairs")
    flags.DEFINE_string("worker_hosts", worker_hosts,
                        "Comma-separated list of hostname:port pairs")
    
    # Training file flags
    flags.DEFINE_string("xtrain",
                        get_data_path(
                            dataset_name = "emanrao/variantnn-demo",
                            local_root = ROOT_PATH_TO_LOCAL_DATA,
                            local_repo = LOCAL_DATASET_NAME,
                            path = xtrain
                            ),
                        "Path to training dataset.")
    flags.DEFINE_string("ytrain",
                        get_data_path(
                            dataset_name = "emanrao/variantnn-demo",
                            local_root = ROOT_PATH_TO_LOCAL_DATA,
                            local_repo = LOCAL_DATASET_NAME,
                            path = ytrain
                            ),
                        "Path to training dataset.")
    
    flags.DEFINE_string("log_dir",
                         get_logs_path(root=PATH_TO_LOCAL_LOGS),
                         "Path to store logs and checkpoints.")
    
    # Validation file flags
    flags.DEFINE_string("xtest",
                        get_data_path(
                            dataset_name = "emanrao/variantnn-demo",
                            local_root = ROOT_PATH_TO_LOCAL_DATA,
                            local_repo = LOCAL_DATASET_NAME,
                            path = xtest
                            ),
                        "Path to testing dataset.")
    flags.DEFINE_string("ytest",
                        get_data_path(
                            dataset_name = "emanrao/variantnn-demo",
                            local_root = ROOT_PATH_TO_LOCAL_DATA,
                            local_repo = LOCAL_DATASET_NAME,
                            path = ytest
                            ),
                        "Path to testing dataset.")

    # Training parameter flags
    flags.DEFINE_integer("batch_size", batch_size,
                        "Batch size [100].")
    flags.DEFINE_integer("num_epochs", num_epochs,
                        "Number epochs [50].")
    flags.DEFINE_float("train_holdout", train_holdout,
                        "Portion of training features withheld from traing and used for validation [0.2].")
    flags.DEFINE_float("learning_rate", learning_rate,
                        "Starting learning rate [0.0005].")
    flags.DEFINE_integer("steps_per_epoch", steps_per_epoch, 
                         "Number of training steps per epoch")

    # Configure Distributed Environment
    def device_and_target():
        # If FLAGS.job_name is not set, we're running single-machine TensorFlow.
        # Don't set a device.
        if FLAGS.job_name is None:
            print("Running single-machine training")
            return (None, "")

        # Otherwise we're running distributed TensorFlow.
        print("Running distributed training")
        if FLAGS.task_index is None or FLAGS.task_index == "":
            raise ValueError("Must specify an explicit `task_index`")
        if FLAGS.ps_hosts is None or FLAGS.ps_hosts == "":
            raise ValueError("Must specify an explicit `ps_hosts`")
        if FLAGS.worker_hosts is None or FLAGS.worker_hosts == "":
            raise ValueError("Must specify an explicit `worker_hosts`")

        cluster_spec = tf.train.ClusterSpec({
                "ps": FLAGS.ps_hosts.split(","),
                "worker": FLAGS.worker_hosts.split(","),
        })
        server = tf.train.Server(
                cluster_spec, job_name=FLAGS.job_name, task_index=FLAGS.task_index)
        if FLAGS.job_name == "ps":
            server.join()

        worker_device = "/job:worker/task:{}".format(FLAGS.task_index)
        # The device setter will automatically place Variables ops on separate
        # parameter servers (ps). The non-Variable ops will be placed on the workers.
        return (
                tf.train.replica_device_setter(
                        worker_device=worker_device,
                        cluster=cluster_spec),
                server.target,
        )

    device, target = device_and_target()

# ----- Read Data  -----   
    # Check Flags
    if FLAGS.log_dir is None or FLAGS.log_dir == "":
        raise ValueError("Must specify an explicit `log_dir`")
    if FLAGS.xtrain is None or FLAGS.xtrain == "":
        raise ValueError("Must specify an explicit `xtrain`")
    if FLAGS.ytrain is None or FLAGS.ytrain == "":
        raise ValueError("Must specify an explicit `ytrain`")
    if FLAGS.xtest is None or FLAGS.xtest == "":
        raise ValueError("Must specify an explicit `xtest`")
    if FLAGS.ytest is None or FLAGS.ytest == "":
        raise ValueError("Must specify an explicit `ytest`")
        
    print('Training dataset file: ', FLAGS.xtrain)
    print('Training target file: ', FLAGS.ytrain)

    print('Testing dataset file: ', FLAGS.xtest)
    print('Testing target file: ', FLAGS.ytest)
    
    print('Log Files Saved To: ', FLAGS.log_dir)

    # Read in data
    Xtrain, Ytrain = read_flat_file(FLAGS.xtrain, FLAGS.ytrain)           
    Xtest, Ytest = read_flat_file(FLAGS.xtest, FLAGS.ytest)  
    
    num_train = int(np.round(Xtrain.shape[0] * (1-FLAGS.train_holdout)))
    num_held = int(Xtrain.shape[0]-num_train)
    print('Training on {:d} features'.format(num_train))
    print('Validating on {:d} features (once per epoch)'.format(num_held)) 
    Xval = Xtrain[num_train:]
    Yval = Ytrain[num_train:]
    Xtrain = Xtrain[:num_train]
    Ytrain = Ytrain[:num_train]
    
    num_batches = int(np.floor(Ytrain.shape[0]/FLAGS.batch_size))
    if num_batches==0: # if defined bach size is below dataset, read as1 batch
        num_batches=1
        FLAGS.batch_size = Ytrain.shape[0]

# ----- Define Graph -----

    tf.reset_default_graph()
    with tf.device(device):            
#        X_in = tf.placeholder(tf.float32, [None, 15, 4, 3])
#        Y_out = tf.placeholder(tf.float32, [None, 8])
        global_step = tf.train.get_or_create_global_step()

        # Create Datasets
        train_dataset = tf.data.Dataset.from_tensor_slices((Xtrain, Ytrain))
#        train_dataset = train_dataset.shuffle(buffer_size=10000)
        train_dataset = train_dataset.batch(FLAGS.batch_size)
#        train_dataset = train_dataset.repeat(FLAGS.num_epochs)
        
        val_dataset = tf.data.Dataset.from_tensor_slices((Xval, Yval))
        val_dataset = val_dataset.batch(Yval.shape[0])
#        val_dataset = val_dataset.repeat(FLAGS.num_epochs)

        test_dataset = tf.data.Dataset.from_tensor_slices((Xtest, Ytest))
        test_dataset = test_dataset.batch(FLAGS.batch_size)

        # Create Iterator
        iter = tf.data.Iterator.from_structure(train_dataset.output_types,
                                           train_dataset.output_shapes)
        features, labels = iter.get_next()
        
        # Create initialisation operations
        train_init_op = iter.make_initializer(train_dataset)
        val_init_op = iter.make_initializer(val_dataset)
        test_init_op = iter.make_initializer(test_dataset)
        
        # Apply model
        with tf.name_scope('predictions'):
            predictions = get_model(features, FLAGS)
        with tf.name_scope('loss'):    
            loss = get_loss(predictions,labels)
        tf.summary.scalar('loss', loss)#add to tboard
         
        with tf.name_scope('train'):
            train_step = (
                tf.train.AdamOptimizer(FLAGS.learning_rate)
                .minimize(loss, global_step=global_step)
                )
            
        summ = tf.summary.merge_all()
        writer = tf.summary.FileWriter(FLAGS.log_dir)
        
#%% Train Model with periodic validation
    def run_train_epoch(target, FLAGS, epoch_index):
        print('Epoch {:d} Training...'.format(epoch_index))
        i=1
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.steps_per_epoch*epoch_index)] # Increment number of required training steps
        scaffold = tf.train.Scaffold(
                local_init_op=[train_init_op, val_init_op],
                saver=tf.train.Saver(max_to_keep=5)
                )
    
        with tf.train.MonitoredTrainingSession(
                master=target,
                is_chief=(FLAGS.task_index == 0),
                checkpoint_dir=FLAGS.log_dir,
                hooks = hooks,
                scaffold=scaffold
                ) as sess:
            
            writer.add_graph(sess.graph)
            sess.run(train_init_op) # switch to train dataset
            
            while not sess.should_stop():
                
                [current_loss,_,s] = sess.run([loss, train_step, summ])
                iteration = (epoch_index)*FLAGS.steps_per_epoch + i
                print("Iteration {}  Training Loss: {:.4f}".format(iteration,current_loss))
                i += 1
                #writer.add_summary(s, i)
                if i==FLAGS.steps_per_epoch: # validate on last session
                    sess.run(val_init_op) # switch to val dataset
                    while True:
                        try: # run and save validation parameters
                            v_loss = sess.run(loss)
                            print("Epoch {}  Validation Loss: {:.4f}".format(epoch_index, v_loss))
                        except tf.errors.OutOfRangeError:
                            break
                        
    for e in range(1,FLAGS.num_epochs+1):
        run_train_epoch(target, FLAGS,e)
    
    # ----- Test Model on Different Dataset -----                  
    with tf.train.MonitoredTrainingSession(
            master=target,
            is_chief=(FLAGS.task_index == 0)
            ) as sess:  
        sess.run(test_init_op) # initialize to test dataset
        loss = sess.run(loss)
        
    print("Test Set Loss (independent dataset): {:.4f}".format(loss))
Exemplo n.º 30
0
import numpy as np
import tensorflow as tf
from model import get_model,get_loss
import time

batch_size=1
num_point=4096
xyzrgb=9

pointclouds_pl=tf.constant(np.random.rand(batch_size,num_point,xyzrgb),dtype=tf.float32)
labels_pl=tf.constant(np.random.rand(batch_size,num_point),dtype=tf.int32)
is_training_pl = tf.constant(True, shape=())
bn_decay=None
pred = get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay)
loss = get_loss(pred, labels_pl)
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(10):
        t1 = time.time()
        sess.run(pred)
        # sess.run(loss)
        t2=time.time()
        print t2-t1