Exemplo n.º 1
0
 def __init__(self, gpu, basename, input_size=32, Resampling=500):
     if basename == 'cnnLstm':
         if gpu >= 0:
             self.model = clstm(gpu, input_size, Resampling).cuda(gpu)
         else:
             self.model = clstm(gpu, input_size, Resampling)
     elif basename == 'cnnVoting':  # 采用CNN Voting 的方法
         if gpu >= 0:
             self.model = cnnVoting(gpu, input_size, Resampling).cuda(gpu)
         else:
             self.model = cnnVoting(gpu, input_size, Resampling)
     elif basename == 'cnnTransformer':
         if gpu >= 0:
             self.model = cnnTransformer(gpu, input_size,
                                         Resampling).cuda(gpu)
         else:
             self.model = cnnTransformer(gpu, input_size, Resampling)
     elif basename == 'cnnSvm':
         if gpu >= 0:
             self.model = cnnSvm(gpu, input_size, Resampling).cuda(gpu)
         else:
             self.model = cnnSvm(gpu, input_size, Resampling)
     elif basename == 'vdCnn':
         if gpu >= 0:
             self.model = VDCNN(gpu, input_size, Resampling).cuda(gpu)
         else:
             self.model = VDCNN(gpu, input_size, Resampling)
     elif basename == 'dpCnn':
         if gpu >= 0:
             self.model = DPCNN(gpu, input_size, Resampling).cuda(gpu)
         else:
             self.model = DPCNN(gpu, input_size, Resampling)
     else:
         pass
Exemplo n.º 2
0
    def __init__(self,
                 model_weights_dir,
                 num_channel=1,
                 device="gpu",
                 device_id=0,
                 variable_reuse=None,
                 is_chinese=False):
        """

        
        :param model_weights_dir: string, save_model dir
        :param num_channel: int | number of channels of input
        :param device: string, cpu or gpu
        :param device_id: int|cpu or gpu device id 
        :param variable_reuse: bool, whether to reuse variable during prediction,(for multiple gpus, see below examples)
        :param is_chinese: bool|whether the model input is chinese
        """
        self.model_weights_dir = model_weights_dir
        self.per_process_gpu_memory_fraction = .95
        self.num_channel = num_channel
        self.device = device
        self.device_id = device_id
        self.variable_reuse = variable_reuse

        self.is_chinese = is_chinese
        # load vocab

        self.vocabulary = learn.preprocessing.CategoricalVocabulary()
        for token in config.ALPHABET:
            self.vocabulary.add(token)
        self.vocabulary.freeze()
        self.index2label = pickle.load(
            open(os.path.join(self.model_weights_dir[:-11], 'index2label.pk'),
                 'rb'))
        self.num_class = len(self.index2label)

        max_document_length = config.FEATURE_LEN
        self.vocab_processor = learn.preprocessing.VocabularyProcessor(
            max_document_length, vocabulary=self.vocabulary, tokenizer_fn=list)
        self.is_training = tf.placeholder('bool', [], name='is_training')
        # load model
        with tf.device(self.device + ":" + str(self.device_id)):
            self.model = VDCNN(
                feature_len=config.FEATURE_LEN,
                num_classes=self.num_class,
                vocab_size=70,  # fixed to 70, <unk> + 69 char in config
                embedding_size=config.CHAR_EBD_SIZE,
                is_training=self.is_training,
                depth=9)

        # Write vocabulary

        self.model_session = self.model_session()
Exemplo n.º 3
0
def main(_):
    depth = 9
    use_he_uniform = True
    optional_shortcut = False
    currentPath = os.path.dirname(os.path.abspath(__file__))
    save_path = os.path.join(currentPath, args.model_path)

    if not os.path.exists(save_path):
        raise ValueError("{} not exists!!".format(save_path))
    ckpt = tf.train.get_checkpoint_state(save_path)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_name = args.model_name

    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.allow_growth = True

    sess = tf.Session(config=config)
    cnn_model = VDCNN(
        input_dim=[context_window_size, feat_size],
        batchsize=batchsize,
        depth=9,
        downsampling_type=args.downsampling_type,
        use_he_uniform=use_he_uniform,
        optional_shortcut=optional_shortcut)
    saver = tf.train.Saver()

    saver.restore(sess, os.path.join(save_path, ckpt_name))
    print("[*] Read {}".format(ckpt_name))

    test_path = os.path.join(currentPath, os.path.join("data", "test"))
    test_list = [
        os.path.join(test_path, file) for file in os.listdir(test_path)
        if file.endswith(".txt")
    ]
    ReadARKFile(sess, test_list, cnn_model)
Exemplo n.º 4
0
# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    is_training = tf.placeholder('bool', [], name='is_training')

    with sess.as_default():
        vdcnn = VDCNN(feature_len=FLAGS.feature_len,
                      num_classes=num_classes,
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=FLAGS.embedding_dim,
                      l2_reg_lambda=FLAGS.l2_reg_lambda,
                      is_training=is_training,
                      depth=9)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        train_ops = vdcnn.build_train_op(FLAGS.lr, global_step)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(
            os.path.join(os.path.curdir, FLAGS.TRAIN_DIR, timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=15)

test_dataset = TextDataset(test_x_path.get(dataset), test_y_path.get(dataset))
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=15)
end = time.time()
print(end - start)
print('Dataset loaded...')

## Model initialization
model = VDCNN(vocab_size, embed_size, depth, downsample, args.shortcut,
              kmaxpool, num_classes)

if is_cuda:
    model.cuda()
    model = nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True
if args.load is not None:
    model.load_state_dict(torch.load(args.load))

## Opitmizer
if optimi == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
else:
    optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.7)
criterion = nn.CrossEntropyLoss()
Exemplo n.º 6
0
def train(epoch=10, batch_size=128, embedding_size=16, class_n=10, maxlen=1014, gpu=None):

    test_ratio = .2

    # fake dataset
    # vocab_n = 100
    # X = np.random.randint(vocab_n, size=(1000, 1, maxlen)).astype(np.int32)
    # T = np.random.randint(10, size=(1000)).astype(np.int32)
    # train_x, test_x = X[:int(len(X)*(1-test_ratio))], X[-int(len(X)*test_ratio):]
    # train_t, test_t = T[:int(len(T)*(1-test_ratio))], T[-int(len(T)*test_ratio):]

    vocab_n = len(token_dict)
    ag = AGCorpus('./datas/newsspace200.xml')
    T, X = ag.get_data()
    N = len(X)
    X = util.np_int32([ util.convert_one_of_m_vector_char(x, token_dict, maxlen).astype(np.int32).reshape(1, maxlen) for x in X ])
    T = util.np_int32(T)
    train_x, test_x = X[:int(len(X)*(1-test_ratio))], X[-int(len(X)*test_ratio):]
    train_t, test_t = T[:int(len(T)*(1-test_ratio))], T[-int(len(T)*test_ratio):]

    train_n = len(train_x)
    test_n = len(test_x)

    model = VDCNN(vocab_n, embedding_size, class_n)

    if gpu:
        chainer.cuda.get_device(gpu).use()
        model.to_gpu()
        xp = chainer.cuda.cupy
    else:
        xp = np

    optimizer = optimizers.MomentumSGD()
    optimizer.setup(model)

    s.s_print('epoch: {}'.format(epoch))
    s.s_print('batch size: {}'.format(batch_size))
    s.s_print('embedding size: {}'.format(embedding_size))
    s.s_print('class n: {}'.format(class_n))
    s.s_print('vocab n: {}'.format(vocab_n))
    s.s_print('train n: {}'.format(train_n))
    s.s_print('test n: {}'.format(test_n))

    for e in range(epoch):
        loss_acc = 0
        order = np.random.permutation(train_n)
        train_iter_x = Iterator(train_x, batch_size, order=order)
        train_iter_t = Iterator(train_t, batch_size, order=order)
        for x, t in tqdm(zip(train_iter_x, train_iter_t)):
            x = model.prepare_input(x, dtype=xp.int32, xp=xp)
            t = model.prepare_input(t, dtype=xp.int32, xp=xp)
            loss = model(x, t)
            loss.backward()
            optimizer.update()
            loss_acc += float(loss.data)
        print('loss: {}'.format(loss_acc/train_n/batch_size))
        order = np.random.permutation(train_n)
        test_iter_x = Iterator(test_x, batch_size, order=order)
        test_iter_t = Iterator(test_t, batch_size, order=order)
        for x, t in tqdm(zip(test_iter_x, test_iter_t)):
            x = model.prepare_input(x, dtype=xp.int32, xp=xp)
            t = model.prepare_input(t, dtype=xp.int32, xp=xp)
            loss = model(x, t)
            loss_acc += float(loss.data)
        print('test loss: {}'.format(loss_acc/test_n/batch_size))
Exemplo n.º 7
0
def main(_):
    # Set some Top params
    # batchsize = 32
    # context_window_size = 11
    # feat_size = 43
    # Change the 2 params to global params
    depth = 9
    use_he_uniform = True
    optional_shortcut = False
    learning_rate = 1e-3
    # num_epochs = 3
    currentPath = os.path.dirname(os.path.abspath(__file__))

    saver_path = os.path.join(currentPath, "model_save")
    if not os.path.exists(saver_path):
        os.mkdir(saver_path)  # create save dir

    TFRecord = os.path.join(currentPath, os.path.join("data", args.TFRecord))
    num_example = 0
    for record in tf.python_io.tf_record_iterator(TFRecord):
        num_example += 1
        # if num_example == 1:  # new add!!!!
        #     break
    print("#############################total examples in TFRecords {} : {}".
          format(TFRecord, num_example))
    num_batchs = num_example / batchsize
    num_iters = int(num_batchs * num_epochs) + 1

    sliced_feat_op, sliced_noise_feat_op = read_and_decode(
        TFRecord, context_window_size, feat_size)

    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.allow_growth = True
    udevice = []
    for device in devices:
        if len(devices) > 1 and device.device_type == "GPU":
            continue
        udevice.append(device)
    sess = tf.Session(config=config)
    if model_type == "ANFCN":
        cnn_model = ANFCN(input_dim=[context_window_size, feat_size],
                          batchsize=batchsize,
                          is_ref=True,
                          do_prelu=True)
    elif model_type == "VDCNN":
        cnn_model = VDCNN(input_dim=[context_window_size, feat_size],
                          batchsize=batchsize,
                          depth=9,
                          downsampling_type=args.downsampling_type,
                          use_he_uniform=use_he_uniform,
                          optional_shortcut=optional_shortcut)
    else:
        print("Model type error!!")
        sys.exit(1)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    print("2!!!!!!")
    with tf.control_dependencies(update_ops):
        global_step = tf.Variable(0, name="global_step", trainable=False)
        # TODO: change the num_batches_per_epoch update strategynum_epochs*num_batches_per_epoch
        learning_rate = tf.train.exponential_decay(learning_rate,
                                                   global_step,
                                                   num_epochs,
                                                   0.95,
                                                   staircase=True)
        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
        gradients, variables = zip(
            *optimizer.compute_gradients(cnn_model.loss))
        gradients, _ = tf.clip_by_global_norm(gradients, 7.0)
        train_op = optimizer.apply_gradients(zip(gradients, variables),
                                             global_step=global_step)
    print("Initializing all variables.")
    sess.run(tf.global_variables_initializer())

    if not os.path.exists(os.path.join(saver_path, "train")):
        os.mkdir(os.path.join(saver_path, "train"))
    tf.summary.scalar("loss", cnn_model.loss)
    merge_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(os.path.join(saver_path, "train"),
                                   sess.graph)
    saver = tf.train.Saver()  # local model saver

    num_iters = 101
    with sess:
        for i in range(num_iters):
            sliced_feat, sliced_noise_feat = sess.run(
                [sliced_feat_op, sliced_noise_feat_op])
            feed = {
                cnn_model.input_x: sliced_noise_feat,
                cnn_model.input_y: sliced_feat,
                cnn_model.is_training: True
            }
            _, step, loss = sess.run([train_op, global_step, cnn_model.loss],
                                     feed)
            train_summary = sess.run(merge_summary,
                                     feed_dict={
                                         cnn_model.input_x: sliced_noise_feat,
                                         cnn_model.input_y: sliced_feat,
                                         cnn_model.is_training: True
                                     })
            print("step {}/{}, loss {:g}".format(step, num_iters, loss))
            if i % save_freq == 0 or i == (num_iters - 1):
                saver.save(sess,
                           os.path.join(saver_path, "saver"),
                           global_step=i)
                writer.add_summary(train_summary, step)
n_dev_samples = 200000  ## Changed from 200000
# TODO: Create a cross validation procedure
x_train, x_dev = x_shuffled[:-n_dev_samples], x_shuffled[-n_dev_samples:]
y_train, y_dev = y_shuffled[:-n_dev_samples], y_shuffled[-n_dev_samples:]
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = VDCNN()

        # Define Training procedure
        ### To update the computation of moving_mean & moving_var, we must put it on the parent graph of minimizing loss
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            # Ensures that we execute the update_ops before performing the train
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(1e-3)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars: