示例#1
0
 def test_Iterator(self, test_input, expected):
     test_iter = iterator.Iterator(test_input, 2, gpu=None)
     test_iter = iter(test_iter)
     result = next(test_iter)
     assert result == expected
     test_iter1 = iterator.Iterator(test_input, 2, gpu=None)
     test_iter2 = iterator.Iterator(test_input, 2, gpu=None)
     for t1, t2 in zip(test_iter1, test_iter2):
         assert t1 == t2
     test_iter = iterator.Iterator(test_input, 1, gpu=None)
     test_iter = iter(test_iter)
     result = next(test_iter)
     assert len(result) == 1
 def __init__(self):
     self.logger = logging.getLogger(__name__)
     self.logger.setLevel(getattr(logging, str(constants.LOG_LEVEL)))
     self.file_handler = RotatingFileHandler(
         constants.APP_LOG_FILENAME,
         maxBytes=constants.LOG_MAX_BYTES,
         backupCount=constants.LOG_BACKUP_COUNT)
     self.file_handler.setFormatter(logging.Formatter(constants.LOG_FORMAT))
     self.logger.addHandler(self.file_handler)
     self.current_ep = None
     self.current_db_tbl = None
     self.iterator = iterator.Iterator()
示例#3
0
文件: train.py 项目: Dutil/IFT6266
def load_dataset(batch_size=128, load_caption=False):

    train_iter = iterator.PreprocessIterator(batch_size=batch_size,
                                             extract_center=True,
                                             load_caption=load_caption)
    val_iter = iterator.Iterator(nb_sub=2000,
                                 batch_size=batch_size,
                                 img_path='val2014',
                                 extract_center=True,
                                 load_caption=load_caption)

    #val_iter.caption_dict = train_iter.caption_dict

    try:
        val_iter.vocab = train_iter.vocab
        val_iter.mapping = train_iter.mapping
        val_iter.process_captions()
    except Exception as e:
        print "The vocab passing didn't worked!"
        print e

    return train_iter, val_iter
示例#4
0
 def print(self):
     print('\n【' + self.get_name() + '】' + self.get_description())
     menu_iterator = iterator.Iterator(self.menu_items)
     while menu_iterator.has_next():
         menu_iterator.next().print()
示例#5
0
文件: train.py 项目: machi-da/HRED
def main():
    args = parse_args()
    model_dir = args.model_dir
    """LOAD CONFIG FILE"""
    config_files = glob.glob(os.path.join(model_dir, '*.ini'))
    assert len(config_files) == 1, 'Put only one config file in the directory'
    config_file = config_files[0]
    config = configparser.ConfigParser()
    config.read(config_file)
    """LOGGER"""
    logger = getLogger(__name__)
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('[%(asctime)s] %(message)s')

    sh = logging.StreamHandler()
    sh.setLevel(logging.INFO)
    sh.setFormatter(formatter)
    logger.addHandler(sh)

    log_file = model_dir + 'log.txt'
    fh = logging.FileHandler(log_file)
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    logger.info('[Training start] logging to {}'.format(log_file))
    """PARAMATER"""
    embed_size = int(config['Parameter']['embed_size'])
    hidden_size = int(config['Parameter']['hidden_size'])
    dropout_ratio = float(config['Parameter']['dropout'])
    weight_decay = float(config['Parameter']['weight_decay'])
    gradclip = float(config['Parameter']['gradclip'])
    vocab_type = config['Parameter']['vocab_type']
    vocab_size = int(config['Parameter']['vocab_size'])
    """TRINING DETAIL"""
    gpu_id = args.gpu
    n_epoch = args.epoch
    batch_size = args.batch
    interval = args.interval
    """DATASET"""
    train_src_file = config['Dataset']['train_src_file']
    train_trg_file = config['Dataset']['train_trg_file']
    valid_src_file = config['Dataset']['valid_src_file']
    valid_trg_file = config['Dataset']['valid_trg_file']
    test_src_file  = config['Dataset']['test_src_file']
    correct_txt_file = config['Dataset']['correct_txt_file']

    train_data_size = dataset.data_size(train_trg_file)
    valid_data_size = dataset.data_size(valid_trg_file)
    logger.info('train size: {0}, valid size: {1}'.format(train_data_size, valid_data_size))

    if vocab_type == 'normal':
        init_vocab = {'<unk>': 0, '<s>': 1, '</s>': 2, '<eod>': 3}
        vocab = dataset.VocabNormal()
        vocab.make_vocab(train_src_file, train_trg_file, init_vocab, vocab_size, freq=0)
        dataset.save_pickle(model_dir + 'src_vocab.pkl', vocab.src_vocab)
        dataset.save_pickle(model_dir + 'trg_vocab.pkl', vocab.trg_vocab)
        sos = vocab.src_vocab['<s>']
        eos = vocab.src_vocab['</s>']
        eod = vocab.src_vocab['<eod>']

    elif vocab_type == 'subword':
        vocab = dataset.VocabSubword()
        if os.path.isfile(model_dir + 'src_vocab.sub.model') and os.path.isfile(model_dir + 'trg_vocab.sub.model'):
            vocab.load_vocab(model_dir + 'src_vocab.sub.model', model_dir + 'trg_vocab.sub.model')
        else:
            vocab.make_vocab(train_trg_file + '.sub', train_trg_file + '.sub', model_dir, vocab_size)
        sos = vocab.src_vocab.PieceToId('<s>')
        eos = vocab.src_vocab.PieceToId('</s>')
        eod = vocab.src_vocab.PieceToId('<eod>')

    src_vocab_size = len(vocab.src_vocab)
    trg_vocab_size = len(vocab.trg_vocab)
    logger.info('src_vocab size: {}, trg_vocab size: {}'.format(src_vocab_size, trg_vocab_size))

    train_iter = iterator.Iterator(train_src_file, train_trg_file, batch_size, sort=True, shuffle=True)
    # train_iter = iterator.Iterator(train_src_file, train_trg_file, batch_size, sort=False, shuffle=False)
    valid_iter = iterator.Iterator(valid_src_file, valid_trg_file, batch_size, sort=False, shuffle=False)
    evaluater = Evaluate(correct_txt_file)
    test_iter = iterator.Iterator(test_src_file, test_src_file, batch_size, sort=False, shuffle=False)
    """MODEL"""
    model = HiSeq2SeqModel(
        WordEnc(src_vocab_size, embed_size, hidden_size, dropout_ratio),
        WordDec(trg_vocab_size, embed_size, hidden_size, dropout_ratio),
        SentEnc(hidden_size, dropout_ratio),
        SentDec(hidden_size, dropout_ratio),
        sos, eos, eod)
    """OPTIMIZER"""
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(gradclip))
    optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))
    """GPU"""
    if gpu_id >= 0:
        logger.info('Use GPU')
        chainer.cuda.get_device_from_id(gpu_id).use()
        model.to_gpu()
    """TRAIN"""
    sum_loss = 0
    loss_dic = {}
    for epoch in range(1, n_epoch + 1):
        for i, batch in enumerate(train_iter.generate(), start=1):
            print(batch)
            exit()
            batch = vocab.convert2label(batch)
            data = converter.convert(batch, gpu_id)
            loss = optimizer.target(*data)
            sum_loss += loss.data
            optimizer.target.cleargrads()
            loss.backward()
            optimizer.update()

            if i % interval == 0:
                logger.info('E{} ## iteration:{}, loss:{}'.format(epoch, i, sum_loss))
                sum_loss = 0
        chainer.serializers.save_npz(model_dir + 'model_epoch_{}.npz'.format(epoch), model)
        # chainer.serializers.save_npz(model_dir + 'optimizer_epoch{0}.npz'.format(epoch), optimizer)

        """EVALUATE"""
        valid_loss = 0
        for batch in valid_iter.generate():
            batch = vocab.convert2label(batch)
            data = converter.convert(batch, gpu_id)
            with chainer.no_backprop_mode(), chainer.using_config('train', False):
                valid_loss += optimizer.target(*data).data
        logger.info('E{} ## val loss:{}'.format(epoch, valid_loss))
        loss_dic[epoch] = valid_loss

        """TEST"""
        output = []
        for batch in test_iter.generate():
            # batch: (articlesのリスト, abstracts_sosのリスト, abstracts_eosのリスト)タプル
            batch = vocab.convert2label(batch)
            data = converter.convert(batch, gpu_id)
            """
            out: [(sent, attn), (sent, attn), ...] <-バッチサイズ
            sent: decodeされた文のリスト
            attn: 各文のdecode時のattentionのリスト
            """
            with chainer.no_backprop_mode(), chainer.using_config('train', False):
                out = model.generate(data[0], data[3])
            output.extend(out)

        res_decode = []
        res_attn = []
        for o in output:
            sent, attn = o
            sentence = dataset.to_list(sent)
            sentence = dataset.eod_truncate(sentence, eod)
            sent_num = len(sentence)
            sentence = [dataset.eos_truncate(s, eos) for s in sentence]
            sentence = [vocab.label2word(s) for s in sentence]
            sentence = dataset.join_sentences(sentence)
            res_decode.append(sentence)
            attn = np.sum(np.array(attn[:sent_num]), axis=0) / sent_num
            res_attn.append(attn)

        rank_list = evaluater.rank(res_attn)
        single = evaluater.single(rank_list)
        multiple = evaluater.multiple(rank_list)
        logger.info('E{} ## precision'.format(epoch))
        logger.info('single: {} | {}'.format(single[0], single[1]))
        logger.info('multi : {} | {}'.format(multiple[0], multiple[1]))

        with open(model_dir + 'model_epoch_{}.hypo'.format(epoch), 'w')as f:
            [f.write(r + '\n') for r in res_decode]
        with open(model_dir + 'model_epoch_{}.attn'.format(epoch), 'w')as f:
            [f.write('{}\n'.format(r)) for r in res_attn]
        with open(model_dir + 'model_epoch_{}.prec'.format(epoch), 'w')as f:
            f.write('single\n')
            f.write(single[0] + '\n')
            f.write(single[1] + '\n')
            f.write('multiple\n')
            f.write(multiple[0] + '\n')
            f.write(multiple[1] + '\n')

    """MODEL SAVE"""
    best_epoch = min(loss_dic, key=(lambda x: loss_dic[x]))
    logger.info('best_epoch:{0}'.format(best_epoch))
    chainer.serializers.save_npz(model_dir + 'best_model.npz', model)
示例#6
0
        v = (music[pos+i] + 1.0) / 2.0
        idx = int(v * (R-1))
        x[idx][0][i] = 1.0
    """
    answer = xp.zeros((N_OUTPUT), dtype='i')
    for i in range(N_OUTPUT):
        v = (music[pos + L + i] + 1.0) / 2.0
        idx = int(v * (R - 1))
        answer[i] = idx
        hoge.append(idx)
    data.append(x)
    answers.append(answer)
    pos += N_OUTPUT

print "NUM DATA:", len(data)
trainIter = iterator.Iterator(10, data, answers, True)
optimizer = chainer.optimizers.Adam(alpha=0.001)
optimizer.setup(model)

for i in range(10):
    print "EPOCH :", i
    while True:
        model.cleargrads()
        x, t = trainIter.Next()
        loss = model(x, t)
        loss.backward()
        optimizer.update()
        if trainIter.newEpoch:
            break

d = data[0]
示例#7
0
 def __reversed__(self):
     last = self._trailer._previous
     return iterator.Iterator(None, last)
示例#8
0
 def __iter__(self):
     first = self._header._next
     return iterator.Iterator(first, None)
示例#9
0
import iterator
import numpy as np
import os
import pickle as pkl

if __name__ == "__main__":

    data_path = "inpainting/preprocess/"

    train_it = iterator.Iterator(batch_size=10000, load_caption=True)

    no_img = 0
    all_caps = {}

    for i, [xs, ys, cs] in enumerate(train_it):

        print "Doing file no {}...".format(i)

        file_name_x = os.path.join(data_path, "data_train_x_{}.npy".format(i))
        file_name_y = os.path.join(data_path, "data_train_y_{}.npy".format(i))

        np.save(open(file_name_x, 'w'), xs)
        np.save(open(file_name_y, 'w'), ys)

        for img_caps in cs:

            all_caps[no_img] = img_caps
            no_img += 1

    print all_caps[0]
示例#10
0
文件: test.py 项目: machi-da/HRED
def main():
    args = parse_args()
    model_dir = args.model_dir
    """LOAD CONFIG FILE"""
    config_files = glob.glob(os.path.join(model_dir, '*.ini'))
    assert len(config_files) == 1, 'Put only one config file in the directory'
    config_file = config_files[0]
    config = configparser.ConfigParser()
    config.read(config_file)
    """LOGGER"""
    logger = getLogger(__name__)
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('[%(asctime)s] %(message)s')

    sh = logging.StreamHandler()
    sh.setLevel(logging.INFO)
    sh.setFormatter(formatter)
    logger.addHandler(sh)

    log_file = model_dir + 'log.txt'
    fh = logging.FileHandler(log_file)
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    logger.info('[Test start] logging to {}'.format(log_file))
    """PARAMATER"""
    embed_size = int(config['Parameter']['embed_size'])
    hidden_size = int(config['Parameter']['hidden_size'])
    dropout_ratio = float(config['Parameter']['dropout'])
    vocab_type = config['Parameter']['vocab_type']
    """TEST DETAIL"""
    gpu_id = args.gpu
    batch_size = args.batch
    model_file = args.model
    if gpu_id >= 0:
        xp = chainer.cuda.cupy
    else:
        xp = np
    """DATASET"""
    test_src_file = config['Dataset']['test_src_file']
    correct_txt_file = config['Dataset']['correct_txt_file']

    test_data_size = dataset.data_size(test_src_file)
    logger.info('test size: {0}'.format(test_data_size))
    if vocab_type == 'normal':
        vocab = dataset.VocabNormal()
        vocab.load_vocab(model_dir + 'src_vocab.normal.pkl',
                         model_dir + 'trg_vocab.normal.pkl')
        vocab.set_reverse_vocab()
        sos = vocab.src_vocab['<s>']
        eos = vocab.src_vocab['</s>']
        eod = vocab.src_vocab['<eod>']

    elif vocab_type == 'subword':
        vocab = dataset.VocabSubword()
        vocab.load_vocab(model_dir + 'src_vocab.sub.model',
                         model_dir + 'trg_vocab.sub.model')
        sos = vocab.src_vocab.PieceToId('<s>')
        eos = vocab.src_vocab.PieceToId('</s>')
        eod = vocab.src_vocab.PieceToId('<eod>')

    src_vocab_size = len(vocab.src_vocab)
    trg_vocab_size = len(vocab.trg_vocab)
    logger.info('src_vocab size: {}, trg_vocab size: {}'.format(
        src_vocab_size, trg_vocab_size))

    evaluater = Evaluate(correct_txt_file)
    test_iter = iterator.Iterator(test_src_file,
                                  test_src_file,
                                  batch_size,
                                  sort=False,
                                  shuffle=False)
    """MODEL"""
    model = HiSeq2SeqModel(
        WordEnc(src_vocab_size, embed_size, hidden_size, dropout_ratio),
        WordDec(trg_vocab_size, embed_size, hidden_size, dropout_ratio),
        SentEnc(hidden_size, dropout_ratio),
        SentDec(hidden_size, dropout_ratio), sos, eos, eod)
    chainer.serializers.load_npz(model_file, model)
    """GPU"""
    if gpu_id >= 0:
        logger.info('Use GPU')
        chainer.cuda.get_device_from_id(gpu_id).use()
        model.to_gpu()
    """TEST"""
    output = []
    for batch in test_iter.generate():
        # batch: (articlesのリスト, abstracts_sosのリスト, abstracts_eosのリスト)タプル
        batch = vocab.convert2label(batch)
        data = converter.convert(batch, gpu_id)
        """
        out: [(sent, attn), (sent, attn), ...] <-バッチサイズ
        sent: decodeされた文のリスト
        attn: 各文のdecode時のattentionのリスト
        """
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            out = model.generate(data[0], data[3])
        output.extend(out)

    res_decode = []
    res_attn = []
    for o in output:
        sent, attn = o
        sentence = dataset.to_list(sent)
        sentence = dataset.eod_truncate(sentence, eod)
        sent_num = len(sentence)
        sentence = [dataset.eos_truncate(s, eos) for s in sentence]
        sentence = [vocab.label2word(s) for s in sentence]
        sentence = dataset.join_sentences(sentence)
        res_decode.append(sentence)
        attn = xp.sum(xp.array(attn[:sent_num]), axis=0) / sent_num
        res_attn.append(attn)

    rank_list = evaluater.rank(res_attn)
    single = evaluater.single(rank_list)
    multiple = evaluater.multiple(rank_list)
    logger.info('single: {} | {}'.format(single[0], single[1]))
    logger.info('multi : {} | {}'.format(multiple[0], multiple[1]))

    with open(model_file + '.hypo_t', 'w') as f:
        [f.write(r + '\n') for r in res_decode]
    with open(model_file + '.attn_t', 'w') as f:
        [f.write('{}\n'.format(r)) for r in res_attn]
示例#11
0
import matplotlib.pyplot as plt
import numpy as np
import iterator as it
import plotter as pt
import segment as sg
import transformation as tr

transformations = tr.Transformation.getCantorSet()
for t in transformations:
    t.printTransformation()
segments = [sg.Segment(sg.Segment(None, np.array([0, 0])), np.array([0, 1]))]
for s in segments:
    s.printSegment()
itera = it.Iterator(transformations, segments, 5)