Exemplo n.º 1
0
def main():
    if len(sys.argv) < 3:
        print 'Need input/output filenames as first/second arguments. Exit.'
        exit(-1)
    if os.path.exists(sys.argv[2]):
        print 'Output file exists. Exit.'
        exit(-1)

    print '\nChecking input file consistency...'
    loaders = dataLoader.get_data(sys.argv[1])
    checks = checker.run_check_data(loaders, True, True, False)
    for data in loaders:
        typ = data.meas_type
        if 1 in data.flavs:
            print 'FLAV_C already present in input file for %s. Exit.' % typ
            exit(-1)
    if not any(0 in data.flavs for data in loaders):
        print 'FLAV_B not found in input file. Exit.'
        exit(-1)

    print '\nGenerating new csv content...'
    new_csv_data = list(
        itertools.chain.from_iterable(l for d in loaders
                                      for l in generate_flav_c(d)))

    with open(sys.argv[1]) as f:
        old_csv_data = f.readlines()

    with open(sys.argv[2], 'w') as f:
        f.writelines(old_csv_data)
        f.write('\n')
        f.writelines(new_csv_data)

    print 'Done.'
Exemplo n.º 2
0
def gen(**kwargs):
    """
    提供命令借口,用以生成相应的诗
    """
    for k, v in kwargs.items():
        setattr(opt, k, v)

    # 加载数据和模型
    data, word2ix, ix2word = get_data(opt)
    model = PoetryModel(len(word2ix), 128, 256)
    map_location = lambda s, l: s
    state_dict = t.load(opt.model_path, map_location=map_location)
    model.load_state_dict(state_dict)
    if opt.use_gpu:
        model.cuda()
    # python2和python3 字符串兼容
    if sys.version_info.major == 3:
        if opt.start_words.isprintable():
            start_words = opt.start_words
            prefix_words = opt.prefix_words if opt.prefix_words else None
        else:
            start_words = opt.start_words.encode(
                'ascii', 'surrogateescape').decode('utf8')
            prefix_words = opt.prefix_words.encode(
                'ascii',
                'surrogateescape').decode('utf8') if opt.prefix_words else None
    else:
        start_words = opt.start_words.decode('utf8')
        prefix_words = opt.prefix_words.decode(
            'utf8') if opt.prefix_words else None
    # 编码问题,半角改成全角,古诗中都是全角符号
    start_words = start_words.replace(',', u',')\
        .replace('.', u'。')\
        .replace('?', u'?')

    # 判断是藏头诗还是普通诗
    gen_poetry = gen_acrostic if opt.acrostic else generate
    result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words)
    print(''.join(result))
Exemplo n.º 3
0
def main():
    if len(sys.argv) < 3:
        print 'Need input/output filenames as first/second arguments. Exit.'
        exit(-1)
    if os.path.exists(sys.argv[2]):
        print 'Output file exists. Exit.'
        exit(-1)

    print '\nChecking input file consistency...'
    loaders = dataLoader.get_data(sys.argv[1])
    checks = checker.run_check_data(loaders, True, True, False)
    for res, data in itertools.izip(checks, loaders):
        typ = data.meas_type
        if not res:
            print 'Checks on input file failed for %s. Exit.' % typ
            exit(-1)
        if not 0 in data.flavs:
            print 'FLAV_B not found in input file for %s. Exit.' % typ
            exit(-1)
        if 1 in data.flavs:
            print 'FLAV_C already present in input file for %s. Exit.' % typ
            exit(-1)

    print '\nGenerating new csv content...'
    new_csv_data = list(itertools.chain.from_iterable(
        l
        for d in loaders
        for l in generate_flav_c(d)
    ))

    with open(sys.argv[1]) as f:
        old_csv_data = f.readlines()

    with open(sys.argv[2], 'w') as f:
        f.writelines(old_csv_data)
        f.write('\n')
        f.writelines(new_csv_data)

    print 'Done.'
def run_check(filename, op=True, sys=True, flavor=True):
    loaders = dataLoader.get_data(filename)
    return run_check_data(loaders, op, sys, flavor)
def run_check(filename, op=True, sys=True, flavor=True):
    loaders = dataLoader.get_data(filename)
    return run_check_data(loaders, op, sys, flavor)
Exemplo n.º 6
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    vis = Visualizer(env=opt.env)

    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()

    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))

    if opt.use_gpu:
        model.cuda()
    loss_meter = meter.AverageValueMeter()

    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, data_ in tqdm(enumerate(dataloader)):
            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            if opt.use_gpu:
                data_ = data_.cuda()
                optimizer.zero_grad()

                # 输入和目标错开
                input_, target = Variable(data_[:-1, :]), Variable(
                    data_[1:, :])
                # target = target.cuda()
                output, _ = model(input_)
                loss = criterion(output, target.view(-1))
                loss.backward()
                optimizer.step()

                loss_meter.add(loss.data[0])

                # 可视化
                if (ii + 1) % opt.plot_every == 0:

                    # if os.path.exists(opt.debug_file):
                    #     ipdb.set_trace()
                    vis.plot('loss', loss_meter.value()[0])

                    # 诗歌原文
                    poetrys = [[ix2word[_word] for _word in data_[:, _iii]]
                               for _iii in range(data_.size(1))][:16]
                    vis.text('<br>'.join(
                        [''.join(poetry) for poetry in poetrys]),
                             win='origin_poem')

                    gen_poetries = []
                    # 分别以这几个字作为诗歌的第一个字,生成8首诗
                    for word in list('春江花月夜凉如水'):
                        gen_poetry = ''.join(
                            generate(model, word, ix2word, word2ix))
                        gen_poetries.append(gen_poetry)
                    vis.text('</br>'.join(
                        [''.join(poetry) for poetry in gen_poetries]),
                             win='gen_poem')

        t.save(model.state_dict(), '%s_%s.path' % (opt.model_prefix, epoch))