示例#1
0
def main(mutations_file, data_dir):
    mutations = read_data(mutations_file)
    entity = mutations["entity"]

    cur_file = f"{data_dir}/{entity.lower()}.json"
    cur_data = read_data(cur_file)

    new_data = process_data(cur_data=cur_data, mutations=mutations)

    out_file = cur_file
    write_data(new_data, out_file)
示例#2
0
def main(new_file, cur_dir, out_dir):
    new_data = read_data(new_file)
    entity = new_data["entity"]

    cur_file = f"{cur_dir}/{entity.lower()}.json"
    cur_data = read_data(cur_file)

    mutations = process_data(cur_data=cur_data, new_data=new_data)

    out_file = f"{out_dir}/mutations.json"
    write_data(mutations, out_file)
示例#3
0
def lstm_classifier(data_dir, label_dir, to_dir):
    # Create a Keras lstm model
    
    batch_size = 256

    # Read in the local data with Shakespearean content
    x_train, y_train, x_valid, y_valid, vocab = reader.read_data(data_dir, label_dir)

    # Create a linear stack of models
    model = Sequential()
    model.add(Embedding(len(vocab)+1, 300, input_length=15))
    model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(1, activation='tanh'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(x_train, y_train, batch_size=batch_size, epochs=5)


    score = model.evaluate(x_valid, y_valid, batch_size=batch_size)
    print('test loss:', score[0])
    print('test accuracy:', score[1])
    
    # Save the model
    model.save(to_dir)
    
    return history
示例#4
0
def main(input_dir, output_dir, entity):
    cur_file = f"{input_dir}/{entity.lower()}.json"
    cur_data = read_data(cur_file)

    labels, lines = process_data(cur_data=cur_data)

    write_data(labels, lines, output_dir)
示例#5
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError('Must set --data_path')
    print ' '.join(sys.argv)

    config = Config()
    train, dev, word2id, tag2id = \
      reader.read_data(FLAGS.data_path, config.batch_size)
    id2word = sorted(word2id, key=word2id.get)
    id2tag = sorted(tag2id, key=tag2id.get)
    config.word_vocab_size, config.tag_vocab_size = len(word2id), len(tag2id)

    print 'batch_size: %d' % config.batch_size
    print 'init_scale: %.2f' % config.init_scale
    print 'keep_prob: %.2f' % config.keep_prob
    print 'learning_rate: %.5f' % config.learning_rate
    print 'lr_decay: %.2f' % config.lr_decay
    print 'max_epoch: %d' % config.max_epoch
    print 'max_grad_norm: %d' % config.max_grad_norm
    print 'max_max_epoch: %d' % config.max_max_epoch
    print 'num_layers: %d' % config.num_layers
    print 'rnn_size: %d' % config.rnn_size
    print 'use_peepholes: %r' % config.use_peepholes
    sys.stdout.flush()

    with tf.Graph().as_default(), tf.Session() as sess:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = Model(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            m_dev = Model(is_training=False, config=config)
        tf.initialize_all_variables().run()

        prev = float('inf')
        lr_decay = 1.
        for i in xrange(config.max_max_epoch):
            start_time = time.time()
            shuffle(train)
            # lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            m.assign_lr(sess, config.learning_rate * lr_decay)
            print 'epoch: %d learning rate: %.3e' % (i + 1, sess.run(m.lr))

            train_loss, train_perp, train_acc = \
              run_epoch(sess, m, train, m.train_op, verbose=True)
            print '%d, train loss: %.2f, perp: %.4f, acc: %.2f' \
              % (i+1, train_loss, train_perp, train_acc)

            dev_loss, dev_perp, dev_acc = run_epoch(sess, m_dev, dev,
                                                    tf.no_op())
            print '%d, dev loss: %.2f, perp: %.4f, acc: %.2f' % \
              (i+1, dev_loss, dev_perp, dev_acc)

            if prev < dev_loss:
                lr_decay *= config.lr_decay
            prev = dev_loss

            print 'it took %.2f seconds' % (time.time() - start_time)
            sys.stdout.flush()
示例#6
0
def load_model_(data_dir, label_dir, dir):
    # load and evaluate a saved model
    model = load_model(dir)
    # summarize model.
    print(model.summary())
    _, _, x_valid, y_valid, _ = reader.read_data(data_dir, label_dir)
    score = model.evaluate(x_valid, y_valid)
    print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) # acc: 94.85%
示例#7
0
def main(input_dir, output_dir, entity):
    cur_file = f"{input_dir}/{entity.lower()}.json"
    cur_data = read_data(cur_file)

    new_data = process_data(cur_data=cur_data)

    out_file = f"{output_dir}/{entity.lower()}.json"
    write_data(new_data, out_file)
示例#8
0
 def test_reader(self):
     target = [{
         'src': '13.43.13.123',
         'dst': '85.123.34.1'
     }, {
         'src': '45.14.153.12',
         'dst': '198.12.155.62'
     }]
     file = 'test.pcap'
     self.assertEqual(target, reader.read_data(file))
def main(args):
    utils.init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    torch.backends.cudnn.benchmark = True

    # Data loading code
    print("Loading data")
    # traindir = os.path.join(args.data_path, 'train.txt')
    # valdir = os.path.join(args.data_path, 'val.txt')
    # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
    #                                  std=[0.229, 0.224, 0.225])

    traindir = r'./datasets/Corel5k/train.txt'
    valdir = r'./datasets/Corel5k/val.txt'

    print("Creating data loaders")

    data_loader, data_loader_test = reader.read_data(traindir=traindir, valdir=valdir,
                                                     batch_size=args.batch_size, num_works=args.workers)

    print("Creating model")
    model = models.__dict__[args.model](pretrained=True)
    model.to(device)
    if args.distributed:
        model = torch.nn.utils.convert_sync_batchnorm(model)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    criterion = nn.BCELoss()

    optimizer = torch.optim.SGD(
        model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])

    if args.test_only:
        evaluate(model, criterion, data_loader, device=device)
        sio.savemat('feat_train.mat', {'feat_train': feat_test})
        return
示例#10
0
def plot_image(path):
    data = reader.read_data(path)
    fig = matplotlib.pyplot.figure(figsize=(16, 16))
    ax = fig.add_subplot(111)

    def animate(i):
        im = ax.imshow(np.flipud(data[:, :, i].transpose()), cmap='viridis')
        return [im]

    return matplotlib.animation.FuncAnimation(fig,
                                              animate,
                                              frames=data.shape[2],
                                              interval=200,
                                              blit=True)
示例#11
0
    def __init__(self, info):
        from reader import read_data

        config = read_data(info)

        self.shape = config['shape']
        self.x_o1, self.y0, self.z_r = config['x_o1'], config['y0'], config[
            'z_r']
        self.z_o, self.z_r = config['z_o'], config['z_r']
        self.S_o, self.S_b = config['S_o'], config['S_b']
        self.w_b = config['w_b']
        self.R_br, self.R_lb, self.R_bt = config['R_br'], config[
            'R_lb'], config['R_bt']
        self.cell_size = config['cell_size']

        self.set_params()
示例#12
0
def find_R(nazir_ip, mix_ip, m, pcap_file):
    R_list          = []
    nazir_sending   = False
    current_ri      = []

    for packet in reader.read_data(pcap_file):
        if not packet['src'] == mix_ip and current_ri:
            R_list += [set(current_ri)]
            current_ri = []
            nazir_sending = False

        if packet['dst'] == mix_ip and packet['src'] == nazir_ip:
            nazir_sending = True
        elif packet['src'] == mix_ip and nazir_sending:
            current_ri += [packet['dst']]

    return R_list
示例#13
0
def sample(FLAGS, n=10):
    data_path = os.path.join(FLAGS.data_dir, FLAGS.data_name + "test.txt")

    with open(os.path.join(FLAGS.checkpoint_dir, FLAGS.data_name + ".config"),
              "rb") as f:
        config = pkl.load(f)

    FLAGS.mem_size = config["mem_size"]
    FLAGS.nwords = config["nwords"]
    FLAGS.vocab = config["vocab"]

    with tf.Session() as sess:
        m = QAModelN2N(FLAGS, sess)
        m.build_model()
        m.load(FLAGS.checkpoint_dir)
        generator = read_data(data_path, m.vocab)
        rev_vocab = {v: k for k, v in m.vocab.iteritems()}
        for i, (x, q, a) in enumerate(generator):
            if i == n: break
            print("CONTEXT: " + " ".join([rev_vocab[xi] for xi in x]))
            print("QUESTION: " + " ".join([rev_vocab[xi] for xi in q]))
            print("PREDICTED ANSWER: " + m.sample(x, q, rev_vocab))
            print("ACTUAL ANSWER: " + rev_vocab[a[0]])
            print("=" * 80)
示例#14
0
def see(ls, ext, path):
    index = 0
    key = 0
    slc = 0
    while key != 'q':
        key = 0
        data = reader.read_data(path + ls[index] + ext)
        # (horizontal axis(?), depth axis(front->back), height(floor->ceil)
        data = np.flipud(data.transpose([1, 0, 2]))
        data /= np.max(data)
        while key not in ['q', 'n', 'p']:
            cv2.imshow('viewer', data[:, :, slc])
            key = chr(cv2.waitKey() % 256)
            if key in ['n', 'p']:
                continue
            elif key == 'b':
                slc = (slc - 1 + data.shape[2]) % data.shape[2]
            else:
                slc = (slc + 1) % data.shape[2]
        if key == 'n':
            index += 1
        elif key == 'p':
            index -= 1
        index = max(0, min(index, len(ls) - 1))
示例#15
0
文件: model.py 项目: BillPei/MemN2N-1
    def train(self, data_path, epochs=100):
        merged_sum = tf.merge_all_summaries()
        writer = tf.train.SummaryWriter("./logs/{}".format(self.get_model_name()),
                                        self.sess.graph)

        data_size = 1
        for epoch in xrange(epochs):
            generator = read_data(data_path, self.vocab)
            lr = self.init_lr
            if epoch % 25 == 0 and 0 < epoch < 100:
                lr /= 2.
            for step,(x,q,a) in enumerate(generator):
                _, loss, summary = self.sess.run(
                    [self.optim, self.loss, merged_sum],
                    feed_dict={self.inputs: x, self.q: q, self.a: a, self.lr: lr})

                if step % 10 == 0:
                    print("Epoch: {}, Step: {}, loss: {}".format(epoch,
                          epoch*data_size + step, loss))
                if step % 2 == 0:
                    writer.add_summary(summary, epoch*data_size + step)
                if step % 500 == 0:
                    self.save(global_step=step)
            data_size = step + 1
示例#16
0
def main(_):
    if os.path.exists(config.forward_log_path) and config.mode == 'forward':
        os.system('rm ' + config.forward_log_path)
    if os.path.exists(config.backward_log_path) and config.mode == 'backward':
        os.system('rm ' + config.backward_log_path)
    if os.path.exists(config.use_output_path):
        os.system('rm ' + config.use_output_path)
    for item in config.record_time:
        if os.path.exists(config.use_output_path + str(item)):
            os.system('rm ' + config.use_output_path + str(item))
    if os.path.exists(config.use_log_path):
        os.system('rm ' + config.use_log_path)
    if config.mode == 'forward' or config.mode == 'use':
        with tf.name_scope("forward_train"):
            with tf.variable_scope("forward", reuse=None):
                m_forward = PTBModel(is_training=True)
        with tf.name_scope("forward_test"):
            with tf.variable_scope("forward", reuse=True):
                mtest_forward = PTBModel(is_training=False)
        var = tf.trainable_variables()
        var_forward = [x for x in var if x.name.startswith('forward')]
        saver_forward = tf.train.Saver(var_forward, max_to_keep=1)
    if config.mode == 'backward' or config.mode == 'use':
        with tf.name_scope("backward_train"):
            with tf.variable_scope("backward", reuse=None):
                m_backward = PTBModel(is_training=True)
        with tf.name_scope("backward_test"):
            with tf.variable_scope("backward", reuse=True):
                mtest_backward = PTBModel(is_training=False)
        var = tf.trainable_variables()
        var_backward = [x for x in var if x.name.startswith('backward')]
        saver_backward = tf.train.Saver(var_backward, max_to_keep=1)

    init = tf.global_variables_initializer()

    with tf.Session() as session:
        session.run(init)
        if config.mode == 'forward':
            #train forward language model
            train_data, test_data = reader.read_data(config.data_path,
                                                     config.num_steps)
            test_mean_old = 15.0

            for epoch in range(config.max_epoch):
                train_ppl_list = []
                test_ppl_list = []
                for i in range(train_data.length // config.batch_size):
                    input, sequence_length, target = train_data(
                        m_forward.batch_size, i)
                    train_perplexity = run_epoch(session,
                                                 m_forward,
                                                 input,
                                                 sequence_length,
                                                 target,
                                                 mode='train')
                    train_ppl_list.append(train_perplexity)
                    print("Epoch:%d, Iter: %d Train NLL: %.3f" %
                          (epoch, i + 1, train_perplexity))
                for i in range(test_data.length // config.batch_size):
                    input, sequence_length, target = test_data(
                        mtest_forward.batch_size, i)
                    test_perplexity = run_epoch(session,
                                                mtest_forward,
                                                input,
                                                sequence_length,
                                                target,
                                                mode='test')
                    test_ppl_list.append(test_perplexity)
                    print("Epoch:%d, Iter: %d Test NLL: %.3f" %
                          (epoch, i + 1, test_perplexity))
                test_mean = np.mean(test_ppl_list)
                if test_mean < test_mean_old:
                    test_mean_old = test_mean
                    saver_forward.save(session, config.forward_save_path)
                write_log(
                    'train ppl:' + str(np.mean(train_ppl_list)) + '\t' +
                    'test ppl:' + str(test_mean), config.forward_log_path)

        if config.mode == 'backward':
            #train backward language model
            train_data, test_data = reader.read_data(config.data_path,
                                                     config.num_steps)
            test_mean_old = 15.0
            for epoch in range(config.max_epoch):
                train_ppl_list = []
                test_ppl_list = []

                for i in range(train_data.length // config.batch_size):
                    input, sequence_length, target = train_data(
                        m_backward.batch_size, i)
                    input, sequence_length, target = reverse_seq(
                        input, sequence_length, target)
                    train_perplexity = run_epoch(session,
                                                 m_backward,
                                                 input,
                                                 sequence_length,
                                                 target,
                                                 mode='train')
                    train_ppl_list.append(train_perplexity)
                    print("Epoch:%d, Iter: %d Train NLL: %.3f" %
                          (epoch, i + 1, train_perplexity))
                for i in range(test_data.length // config.batch_size):
                    input, sequence_length, target = test_data(
                        mtest_backward.batch_size, i)
                    input, sequence_length, target = reverse_seq(
                        input, sequence_length, target)
                    test_perplexity = run_epoch(session,
                                                mtest_backward,
                                                input,
                                                sequence_length,
                                                target,
                                                mode='test')
                    test_ppl_list.append(test_perplexity)
                    print("Epoch:%d, Iter: %d Test NLL: %.3f" %
                          (epoch, i + 1, test_perplexity))
                test_mean = np.mean(test_ppl_list)
                if test_mean < test_mean_old:
                    test_mean_old = test_mean
                    saver_backward.save(session, config.backward_save_path)
                write_log(
                    'train ppl:' + str(np.mean(train_ppl_list)) + '\t' +
                    'test ppl:' + str(test_mean), config.backward_log_path)

        if config.mode == 'use':
            #CGMH sampling for sentence_correction
            sim = config.sim
            sta_vec = list(np.zeros([config.num_steps - 1]))

            saver_forward.restore(session, config.forward_save_path)
            saver_backward.restore(session, config.backward_save_path)
            config.shuffle = False
            #erroneous sentence input
            if config.keyboard_input == True:
                #input from keyboard if key_input is not empty
                key_input = raw_input('please input a sentence\n')
                if key_input == '':
                    use_data = reader.read_data_use(config.use_data_path,
                                                    config.num_steps)
                else:
                    sta_vec_list = [sen2sta_vec(key_input)]
                    key_input = key_input.split()
                    #key_input=sen2id(key_input)
                    use_data = [key_input]
            else:
                #load keywords from file
                use_data = []
                with open(config.use_data_path) as f:
                    for line in f:
                        use_data.append(line.strip().split())
            config.batch_size = 1

            for sen_id in range(len(use_data)):
                #generate for each sentence
                input_ = use_data[sen_id]
                pos = 0

                for iter in range(config.sample_time):
                    #ind is the index of the selected word, regardless of the beginning token.
                    sta_vec = sen2sta_vec(' '.join(input_))
                    input__ = reader.array_data([sen2id(input_)],
                                                config.num_steps,
                                                config.dict_size)
                    input, sequence_length, _ = input__(1, 0)
                    input_original = input[0]

                    ind = pos % (sequence_length[0] - 1)
                    print(' '.join(input_))

                    if iter in config.record_time:
                        with open(config.use_output_path + str(iter),
                                  'a') as g:
                            g.write(' '.join(input_) + '\n')

                    if True:
                        prob_old = run_epoch(session,
                                             mtest_forward,
                                             input,
                                             sequence_length,
                                             mode='use')
                        if config.double_LM == True:
                            input_backward, _, _ = reverse_seq(
                                input, sequence_length, input)
                            prob_old = (prob_old + run_epoch(session,
                                                             mtest_backward,
                                                             input_backward,
                                                             sequence_length,
                                                             mode='use')) * 0.5

                        tem = 1
                        for j in range(sequence_length[0] - 1):
                            tem *= prob_old[0][j][input[0][j + 1]]
                        tem *= prob_old[0][j + 1][config.dict_size + 1]
                        prob_old_prob = tem

                        if sim != None:
                            similarity_old = similarity(
                                input[0], input_original)
                            prob_old_prob *= similarity_old
                        else:
                            similarity_old = -1

                        input_candidate_ = generate_change_candidate(
                            input_, ind)
                        tem = reader.array_data(
                            [sen2id(x) for x in input_candidate_],
                            config.num_steps, config.dict_size)
                        input_candidate, sequence_length_candidate, _ = tem(
                            len(input_candidate_), 0)

                        prob_candidate_pre = run_epoch(
                            session,
                            mtest_forward,
                            input_candidate,
                            sequence_length_candidate,
                            mode='use')
                        if config.double_LM == True:
                            input_candidate_backward, _, _ = reverse_seq(
                                input_candidate, sequence_length_candidate,
                                input_candidate)
                            prob_candidate_pre = (
                                prob_candidate_pre +
                                run_epoch(session,
                                          mtest_backward,
                                          input_candidate_backward,
                                          sequence_length_candidate,
                                          mode='use')) * 0.5
                        prob_candidate = []
                        for i in range(len(input_candidate_)):
                            tem = 1
                            for j in range(sequence_length[0] - 1):
                                tem *= prob_candidate_pre[i][j][
                                    input_candidate[i][j + 1]]
                            tem *= prob_candidate_pre[i][j +
                                                         1][config.dict_size +
                                                            1]
                            prob_candidate.append(tem)

                        prob_candidate = np.array(prob_candidate)
                        if sim != None:
                            similarity_candidate = similarity_batch(
                                input_candidate, input_original)
                            prob_candidate = prob_candidate * similarity_candidate
                        prob_candidate_norm = normalize(prob_candidate)
                        prob_candidate_ind = sample_from_candidate(
                            prob_candidate_norm)
                        prob_change_prob = prob_candidate[prob_candidate_ind]
                        input_change_ = input_candidate_[prob_candidate_ind]

                    #word replacement (action: 0)
                    if True:
                        if False:
                            pass
                        else:
                            input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(
                                input, sequence_length, ind, mode=0)
                            prob_forward = run_epoch(
                                session,
                                mtest_forward,
                                input_forward,
                                sequence_length_forward,
                                mode='use')[0,
                                            ind % (sequence_length[0] - 1), :]
                            prob_backward = run_epoch(
                                session,
                                mtest_backward,
                                input_backward,
                                sequence_length_backward,
                                mode='use')[0, sequence_length[0] - 1 - ind %
                                            (sequence_length[0] - 1), :]
                            prob_mul = (prob_forward * prob_backward)
                            input_candidate, sequence_length_candidate = generate_candidate_input(
                                input,
                                sequence_length,
                                ind,
                                prob_mul,
                                config.search_size,
                                mode=1)
                            prob_candidate_pre = run_epoch(
                                session,
                                mtest_forward,
                                input_candidate,
                                sequence_length_candidate,
                                mode='use')
                            if config.double_LM == True:
                                input_candidate_backward, _, _ = reverse_seq(
                                    input_candidate, sequence_length_candidate,
                                    input_candidate)
                                prob_candidate_pre = (
                                    prob_candidate_pre +
                                    run_epoch(session,
                                              mtest_backward,
                                              input_candidate_backward,
                                              sequence_length_candidate,
                                              mode='use')) * 0.5

                            prob_candidate = []
                            for i in range(config.search_size):
                                tem = 1
                                for j in range(sequence_length_candidate[0] -
                                               1):
                                    tem *= prob_candidate_pre[i][j][
                                        input_candidate[i][j + 1]]
                                tem *= prob_candidate_pre[i][j + 1][
                                    config.dict_size + 1]
                                prob_candidate.append(tem)
                            prob_candidate = np.array(prob_candidate)
                            if config.sim_word == True:
                                similarity_candidate = similarity_batch(
                                    input_candidate[:, ind + 1:ind + 2],
                                    input_original[ind + 1:ind + 2])
                                prob_candidate = prob_candidate * similarity_candidate
                            prob_candidate_norm = normalize(prob_candidate)

                            prob_candidate_ind = sample_from_candidate(
                                prob_candidate_norm)
                            prob_candidate_prob = prob_candidate[
                                prob_candidate_ind]

                            prob_changeanother_prob = prob_candidate_prob
                            word = id2sen(
                                input_candidate[prob_candidate_ind])[ind]
                            input_changeanother_ = input_[:ind] + [
                                word
                            ] + input_[ind + 1:]

                    #word insertion(action:1)
                    if True:
                        if sequence_length[0] >= config.num_steps:
                            prob_add_prob = 0
                            pass
                        else:
                            input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(
                                input, sequence_length, ind, mode=1)
                            prob_forward = run_epoch(
                                session,
                                mtest_forward,
                                input_forward,
                                sequence_length_forward,
                                mode='use')[0,
                                            ind % (sequence_length[0] - 1), :]
                            prob_backward = run_epoch(
                                session,
                                mtest_backward,
                                input_backward,
                                sequence_length_backward,
                                mode='use')[0, sequence_length[0] - 1 - ind %
                                            (sequence_length[0] - 1), :]
                            prob_mul = (prob_forward * prob_backward)
                            input_candidate, sequence_length_candidate = generate_candidate_input(
                                input,
                                sequence_length,
                                ind,
                                prob_mul,
                                config.search_size,
                                mode=1)
                            prob_candidate_pre = run_epoch(
                                session,
                                mtest_forward,
                                input_candidate,
                                sequence_length_candidate,
                                mode='use')
                            if config.double_LM == True:
                                input_candidate_backward, _, _ = reverse_seq(
                                    input_candidate, sequence_length_candidate,
                                    input_candidate)
                                prob_candidate_pre = (
                                    prob_candidate_pre +
                                    run_epoch(session,
                                              mtest_backward,
                                              input_candidate_backward,
                                              sequence_length_candidate,
                                              mode='use')) * 0.5

                            prob_candidate = []
                            for i in range(config.search_size):
                                tem = 1
                                for j in range(sequence_length_candidate[0] -
                                               1):
                                    tem *= prob_candidate_pre[i][j][
                                        input_candidate[i][j + 1]]
                                tem *= prob_candidate_pre[i][j + 1][
                                    config.dict_size + 1]
                                prob_candidate.append(tem)
                            prob_candidate = np.array(prob_candidate)
                            #similarity_candidate=np.array([similarity(x, input_original) for x in input_candidate])
                            if sim != None:
                                similarity_candidate = similarity_batch(
                                    input_candidate, input_original)
                                prob_candidate = prob_candidate * similarity_candidate
                            prob_candidate_norm = normalize(prob_candidate)

                            prob_candidate_ind = sample_from_candidate(
                                prob_candidate_norm)
                            prob_candidate_prob = prob_candidate[
                                prob_candidate_ind]

                            prob_add_prob = prob_candidate_prob
                            word = id2sen(
                                input_candidate[prob_candidate_ind])[ind]
                            input_add_ = input_[:ind] + [word] + input_[ind:]

                #word deletion(action: 2)
                    if True:
                        if sequence_length[0] <= 2:
                            prob_delete_prob = 0
                            pass
                        else:
                            input_candidate, sequence_length_candidate = generate_candidate_input(
                                input,
                                sequence_length,
                                ind,
                                None,
                                config.search_size,
                                mode=2)
                            prob_new = run_epoch(session,
                                                 mtest_forward,
                                                 input_candidate,
                                                 sequence_length_candidate,
                                                 mode='use')
                            tem = 1
                            for j in range(sequence_length_candidate[0] - 1):
                                tem *= prob_new[0][j][input_candidate[0][j +
                                                                         1]]
                            tem *= prob_new[0][j + 1][config.dict_size + 1]
                            prob_new_prob = tem
                            if sim != None:
                                similarity_new = similarity_batch(
                                    input_candidate, input_original)
                                prob_new_prob = prob_new_prob * similarity_new
                            prob_delete_prob = prob_new_prob
                        input_delete_ = input_[:ind] + input_[ind + 1:]
                    b = np.argmax([
                        prob_old_prob, prob_change_prob,
                        prob_changeanother_prob * 0.3, prob_add_prob * 0.1,
                        prob_delete_prob * 0.001
                    ])
                    print([
                        prob_old_prob, prob_change_prob,
                        prob_changeanother_prob, prob_add_prob,
                        prob_delete_prob
                    ])
                    print([
                        input_, input_change_, input_changeanother_,
                        input_add_, input_delete_
                    ])
                    input_ = [
                        input_, input_change_, input_changeanother_,
                        input_add_, input_delete_
                    ][b]
                    pos += 1
示例#17
0
#!/usr/bin/env python

import reader
import sys
import pypsignifit as pf
import pypsignifit.psignipriors as pfp
import pylab as pl
import numpy as np
import swignifit.swignifit_raw as sfr
import integrate as ig
# import pypsignifit.psigniplot as pp

d,s = reader.read_data ( sys.argv[1] )
d = np.array(d)
# stimulus_intensities = [0.0,2.0,4.0,6.0,8.0,10.0]
# number_of_correct = [34,32,40,48,50,48]
# number_of_trials  = [50]*len(stimulus_intensities)
# data = zip(stimulus_intensities,number_of_correct,number_of_trials)
# d = np.array ( data )

model = {'nafc':1, 'sigmoid':"logistic", 'core':'mw0.1'}
m = 4.0
w = 4.0
l = 0.05
g = 0.02

priors = ["Gauss(%f,%f)" % (m, m), "Gauss(%f,%f)" % (w, w*2), "Beta(2,50)", "Beta(1,50)"]
# priors = (pfp.default_mid(d[:,0])[0],"Gamma(2,4)",pfp.default_lapse(),pfp.default_lapse())
# priors = ("Gauss(4,.1)","Gauss(4,.1)","Beta(2,50)","Beta(1,50)")
print priors
x,fx,priors = ig.integration_grid ( d )
示例#18
0
from keras.layers import LSTM, Dense

from reader import read_data


def get_model():
    my_model = Sequential()
    my_model.add(Dense(1024, activation="relu", input_shape=(1152, )))
    my_model.add(Dense(2048, activation="relu"))
    my_model.add(Dense(4096, activation="relu"))
    my_model.add(Dense(3862, activation="sigmoid"))
    my_model.compile("adam", loss="binary_crossentropy", metrics=["accuracy"])
    return my_model


train = read_data("datasets/video_sample/train00.tfrecord")
validation = read_data("datasets/video_sample/train01.tfrecord")

model = get_model()
early_stop = EarlyStopping(patience=4, monitor='val_loss')
checkpoint = ModelCheckpoint(
    "weights.h5",
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
)
csv_logger = CSVLogger('v.csv')
model.fit_generator(train,
                    steps_per_epoch=50,
                    epochs=50,
                    validation_data=validation,
示例#19
0
from reader import read_data

dataset,vocab = read_data()

print vocab
示例#20
0
#!/usr/bin/env python

from __future__ import print_function

from reader import read_data
import numpy as np


def intersect(a, b):
    if a.right < b.left or b.right < a.left:
        return False
    if a.top < b.bottom or b.top < a.bottom:
        return False
    return True


with open('data.txt', 'r') as f:
    data = read_data(f)

if False:
    test = ('#1 @ 1,3: 4x4', '#2 @ 3,1: 4x4', '#3 @ 5,5: 2x2')
    data = read_data(test)

#print(data)
for i, a in enumerate(data):
    if all(map(lambda x: not intersect(a, x), data[:i] + data[i + 1:])):
        answer = a.id

print('Answer:', answer)
# 346
示例#21
0
文件: paraphrase.py 项目: zhouh/CGMH
def main(_):
  if os.path.exists(config.forward_log_path) and config.mode=='forward':
    os.system('rm '+config.forward_log_path)
  if os.path.exists(config.backward_log_path) and config.mode=='backward':
    os.system('rm '+config.backward_log_path)
  if os.path.exists(config.use_output_path):
    os.system('rm '+config.use_output_path)
  for item in config.record_time:
    if os.path.exists(config.use_output_path+str(item)):
      os.system('rm '+config.use_output_path+str(item))
  if os.path.exists(config.use_log_path):
    os.system('rm '+config.use_log_path)
  if config.mode=='forward' or config.mode=='use':
    with tf.name_scope("forward_train"):
      with tf.variable_scope("forward", reuse=None):
        m_forward = PTBModel(is_training=True)
    with tf.name_scope("forward_test"):
      with tf.variable_scope("forward", reuse=True):
        mtest_forward = PTBModel(is_training=False)
    var=tf.trainable_variables()
    var_forward=[x for x in var if x.name.startswith('forward')]
    saver_forward=tf.train.Saver(var_forward, max_to_keep=1)
  if config.mode=='backward' or config.mode=='use':
    with tf.name_scope("backward_train"):
      with tf.variable_scope("backward", reuse=None):
        m_backward = PTBModel(is_training=True)
    with tf.name_scope("backward_test"):
      with tf.variable_scope("backward", reuse=True):
        mtest_backward = PTBModel(is_training=False)
    var=tf.trainable_variables()
    var_backward=[x for x in var if x.name.startswith('backward')]
    saver_backward=tf.train.Saver(var_backward, max_to_keep=1)
    
  init = tf.global_variables_initializer()
  

  with tf.Session() as session:
    session.run(init)
    if config.mode=='forward':
      train_data, test_data = reader.read_data(config.data_path, config.num_steps)
      test_mean_old=15.0
      
      for epoch in range(config.max_epoch):
        train_ppl_list=[]
        test_ppl_list=[]
        for i in range(train_data.length//config.batch_size):
          input, sequence_length, target=train_data(m_forward.batch_size, i)
          train_perplexity = run_epoch(session, m_forward,input, sequence_length, target, mode='train')
          train_ppl_list.append(train_perplexity)
          print("Epoch:%d, Iter: %d Train NLL: %.3f" % (epoch, i + 1, train_perplexity))
        for i in range(test_data.length//config.batch_size):
          input, sequence_length, target=test_data(mtest_forward.batch_size, i)
          test_perplexity = run_epoch(session, mtest_forward, input, sequence_length, target, mode='test')
          test_ppl_list.append(test_perplexity)
          print("Epoch:%d, Iter: %d Test NLL: %.3f" % (epoch, i + 1, test_perplexity))
        test_mean=np.mean(test_ppl_list)
        if test_mean<test_mean_old:
          test_mean_old=test_mean
          saver_forward.save(session, config.forward_save_path)
        write_log('train ppl:'+str(np.mean(train_ppl_list))+'\t'+'test ppl:'+str(test_mean), config.forward_log_path)
    
    if config.mode=='backward':
      train_data, test_data = reader.read_data(config.data_path, config.num_steps)
      test_mean_old=15.0
      for epoch in range(config.max_epoch):
        train_ppl_list=[]
        test_ppl_list=[]
      
        for i in range(train_data.length//config.batch_size):
          input, sequence_length, target=train_data(m_backward.batch_size, i)
          input, sequence_length, target=reverse_seq(input, sequence_length, target)
          train_perplexity = run_epoch(session, m_backward,input, sequence_length, target, mode='train')
          train_ppl_list.append(train_perplexity)
          print("Epoch:%d, Iter: %d Train NLL: %.3f" % (epoch, i + 1, train_perplexity))
        for i in range(test_data.length//config.batch_size):
          input, sequence_length, target=test_data(mtest_backward.batch_size, i)
          input, sequence_length, target=reverse_seq(input, sequence_length, target)
          test_perplexity = run_epoch(session, mtest_backward, input, sequence_length, target, mode='test')
          test_ppl_list.append(test_perplexity)
          print("Epoch:%d, Iter: %d Test NLL: %.3f" % (epoch, i + 1, test_perplexity))
        test_mean=np.mean(test_ppl_list)
        if test_mean<test_mean_old:
          test_mean_old=test_mean
          saver_backward.save(session, config.backward_save_path)
        write_log('train ppl:'+str(np.mean(train_ppl_list))+'\t'+'test ppl:'+str(test_mean), config.backward_log_path)
  
    if config.mode=='use':
      sim=config.sim
      #keyword stable
      sta_vec=list(np.zeros([config.num_steps-1]))

      saver_forward.restore(session, config.forward_save_path)
      saver_backward.restore(session, config.backward_save_path)
      config.shuffle=False
      if config.keyboard_input==True:
        key_input=raw_input('please input a sentence in lower case\n')
        if key_input=='':
          use_data = reader.read_data_use(config.use_data_path, config.num_steps)
        else:
          key_input=key_input.split()
          key_input=sen2id(key_input)
          use_data = reader.array_data([key_input], config.num_steps, config.dict_size)
      else:
        use_data, sta_vec_list = reader.read_data_use(config.use_data_path, config.num_steps)
      config.batch_size=1
      #use_data.length=1 #######################################
      for sen_id in range(use_data.length):
        if config.keyboard_input==False:
          sta_vec=sta_vec_list[sen_id%len(sta_vec)]
        print(sta_vec)
        input, sequence_length, _=use_data(1, sen_id)
        input_original=input[0]
        for i in range(1,config.num_steps):
          if input[0][i]>config.rare_since and  input[0][i]<config.dict_size:
            sta_vec[i-1]=1
        pos=0

        for iter in range(config.sample_time):
        #ind is the index of the selected word, regardless of the beginning token.
          
          ind=pos%(sequence_length[0]-1)
          action=choose_action(config.action_prob)
          #tem
          print(' '.join(id2sen(input[0])))
          if iter in config.record_time:
            with open(config.use_output_path+str(iter), 'a') as g:
              g.write(' '.join(id2sen(input[0]))+'\n')
          #tem_end
          #print(sta_vec, sequence_length[0], ind)
          '''
          if sta_vec[ind]==1 and action in [0, 2]:                  #stop skipping words
            action=3
          '''
        #change word
          if action==0: 
            prob_old=run_epoch(session, mtest_forward, input, sequence_length, mode='use')
            if config.double_LM==True:
              input_backward, _, _ =reverse_seq(input, sequence_length, input)
              prob_old=(prob_old+run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use'))*0.5

            tem=1
            for j in range(sequence_length[0]-1):
              tem*=prob_old[0][j][input[0][j+1]]
            tem*=prob_old[0][j+1][config.dict_size+1]
            prob_old_prob=tem
            if sim!=None:
              similarity_old=similarity(input[0], input_original, sta_vec)
              prob_old_prob*=similarity_old
            else:
              similarity_old=-1
            input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(input, sequence_length, ind, mode=action)
            prob_forward=run_epoch(session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind%(sequence_length[0]-1),:]
            prob_backward=run_epoch(session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0]-1-ind%(sequence_length[0]-1),:]
            prob_mul=(prob_forward*prob_backward)
            input_candidate, sequence_length_candidate=generate_candidate_input(input, sequence_length, ind, prob_mul, config.search_size, mode=action)
            prob_candidate_pre=run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use')
            if config.double_LM==True:
              input_candidate_backward, _, _ =reverse_seq(input_candidate, sequence_length_candidate, input_candidate)
              prob_candidate_pre=(prob_candidate_pre+run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use'))*0.5
            prob_candidate=[]
            for i in range(config.search_size):
              tem=1
              for j in range(sequence_length[0]-1):
                tem*=prob_candidate_pre[i][j][input_candidate[i][j+1]]
              tem*=prob_candidate_pre[i][j+1][config.dict_size+1]
              prob_candidate.append(tem)
          
            prob_candidate=np.array(prob_candidate)
            #similarity_candidate=np.array([similarity(x, input_original) for x in input_candidate])
            if sim!=None:
              similarity_candidate=similarity_batch(input_candidate, input_original,sta_vec)
              prob_candidate=prob_candidate*similarity_candidate
            prob_candidate_norm=normalize(prob_candidate)
            prob_candidate_ind=sample_from_candidate(prob_candidate_norm)
            prob_candidate_prob=prob_candidate[prob_candidate_ind]
            if input_candidate[prob_candidate_ind][ind+1]<config.dict_size and ( prob_candidate_prob>prob_old_prob*config.threshold or just_acc()==0):
              input=input_candidate[prob_candidate_ind:prob_candidate_ind+1]
            pos+=1
            #old_place=len(prob_mul)-list(np.argsort(prob_mul)).index(input[0][ind+1])
            #write_log('step:'+str(iter)+'action:0 prob_old:'+str(prob_old_prob)+' prob_new:'+str(prob_candidate_prob)+' '+str(old_place)+' '+str(sta_vec.index(1))+' '+str(ind), config.use_log_path)
            print('action:0', 1, prob_old_prob, prob_candidate_prob, prob_candidate_norm[prob_candidate_ind], similarity_old)

          #add word
          if action==1: 
            if sequence_length[0]>=config.num_steps:
              action=3
            else:
              input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(input, sequence_length, ind, mode=action)
              prob_forward=run_epoch(session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind%(sequence_length[0]-1),:]
              prob_backward=run_epoch(session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0]-1-ind%(sequence_length[0]-1),:]
              prob_mul=(prob_forward*prob_backward)
              input_candidate, sequence_length_candidate=generate_candidate_input(input, sequence_length, ind, prob_mul, config.search_size, mode=action)
              prob_candidate_pre=run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use')
              if config.double_LM==True:
                input_candidate_backward, _, _ =reverse_seq(input_candidate, sequence_length_candidate, input_candidate)
                prob_candidate_pre=(prob_candidate_pre+run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use'))*0.5

              prob_candidate=[]
              for i in range(config.search_size):
                tem=1
                for j in range(sequence_length_candidate[0]-1):
                  tem*=prob_candidate_pre[i][j][input_candidate[i][j+1]]
                tem*=prob_candidate_pre[i][j+1][config.dict_size+1]
                prob_candidate.append(tem)
              prob_candidate=np.array(prob_candidate)
              #similarity_candidate=np.array([similarity(x, input_original) for x in input_candidate])
              if sim!=None:
                similarity_candidate=similarity_batch(input_candidate, input_original,sta_vec)
                prob_candidate=prob_candidate*similarity_candidate
              prob_candidate_norm=normalize(prob_candidate)

              prob_candidate_ind=sample_from_candidate(prob_candidate_norm)
              prob_candidate_prob=prob_candidate[prob_candidate_ind]

              prob_old=run_epoch(session, mtest_forward, input, sequence_length, mode='use')
              if config.double_LM==True:
                input_backward, _, _ =reverse_seq(input, sequence_length, input)
                prob_old=(prob_old+run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use'))*0.5

              tem=1
              for j in range(sequence_length[0]-1):
                tem*=prob_old[0][j][input[0][j+1]]
              tem*=prob_old[0][j+1][config.dict_size+1]
            
              prob_old_prob=tem
              if sim!=None:
                similarity_old=similarity(input[0], input_original,sta_vec)
                prob_old_prob=prob_old_prob*similarity_old
              else:
                similarity_old=-1
              alpha=min(1, prob_candidate_prob*config.action_prob[2]/(prob_old_prob*config.action_prob[1]*prob_candidate_norm[prob_candidate_ind]))
              #alpha=min(1, prob_candidate_prob*config.action_prob[2]/(prob_old_prob*config.action_prob[1]))
              print ('action:1',alpha, prob_old_prob,prob_candidate_prob, prob_candidate_norm[prob_candidate_ind], similarity_old)
            
              if choose_action([alpha, 1-alpha])==0 and input_candidate[prob_candidate_ind][ind]<config.dict_size and (prob_candidate_prob>prob_old_prob* config.threshold or just_acc()==0):
              #write_log('step:'+str(iter)+'action:1 prob_old:'+str(prob_old_prob)+' prob_new:'+str(prob_candidate_prob)+' '+str(sta_vec.index(1))+' '+str(ind), config.use_log_path)
                input=input_candidate[prob_candidate_ind:prob_candidate_ind+1]
                sequence_length+=1
                pos+=2
                sta_vec.insert(ind, 0.0)
                del(sta_vec[-1])
              else:
                action=3
       
       
        #delete word
          if action==2:
            if sequence_length[0]<=2:
              action=3
            else:

              prob_old=run_epoch(session, mtest_forward, input, sequence_length, mode='use')
              if config.double_LM==True:
                input_backward, _, _ =reverse_seq(input, sequence_length, input)
                prob_old=(prob_old+run_epoch(session, mtest_backward, input_backward, sequence_length, mode='use'))*0.5

              tem=1
              for j in range(sequence_length[0]-1):
                tem*=prob_old[0][j][input[0][j+1]]
              tem*=prob_old[0][j+1][config.dict_size+1]
              prob_old_prob=tem
              if sim!=None:
                similarity_old=similarity(input[0], input_original,sta_vec)
                prob_old_prob=prob_old_prob*similarity_old
              else:
                similarity_old=-1
              input_candidate, sequence_length_candidate=generate_candidate_input(input, sequence_length, ind, None , config.search_size, mode=2)
              prob_new=run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use')
              tem=1
              for j in range(sequence_length_candidate[0]-1):
                tem*=prob_new[0][j][input_candidate[0][j+1]]
              tem*=prob_new[0][j+1][config.dict_size+1]
              prob_new_prob=tem
              if sim!=None:
                similarity_new=similarity_batch(input_candidate, input_original,sta_vec)
                prob_new_prob=prob_new_prob*similarity_new
            
              input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(input, sequence_length, ind, mode=0)
              prob_forward=run_epoch(session, mtest_forward, input_forward, sequence_length_forward, mode='use')[0, ind%(sequence_length[0]-1),:]
              prob_backward=run_epoch(session, mtest_backward, input_backward, sequence_length_backward, mode='use')[0, sequence_length[0]-1-ind%(sequence_length[0]-1),:]
              prob_mul=(prob_forward*prob_backward)
              input_candidate, sequence_length_candidate=generate_candidate_input(input, sequence_length, ind, prob_mul, config.search_size, mode=0)
              prob_candidate_pre=run_epoch(session, mtest_forward, input_candidate, sequence_length_candidate, mode='use')
              if config.double_LM==True:
                input_candidate_backward, _, _ =reverse_seq(input_candidate, sequence_length_candidate, input_candidate)
                prob_candidate_pre=(prob_candidate_pre+run_epoch(session, mtest_backward, input_candidate_backward, sequence_length_candidate, mode='use'))*0.5

              prob_candidate=[]
              for i in range(config.search_size):
                tem=1
                for j in range(sequence_length[0]-1):
                  tem*=prob_candidate_pre[i][j][input_candidate[i][j+1]]
                tem*=prob_candidate_pre[i][j+1][config.dict_size+1]
                prob_candidate.append(tem)
              prob_candidate=np.array(prob_candidate)
            
              #similarity_candidate=np.array([similarity(x, input_original) for x in input_candidate])
              if sim!=None:
                similarity_candidate=similarity_batch(input_candidate, input_original,sta_vec)
                prob_candidate=prob_candidate*similarity_candidate
            
              #####There is a unsolved problem
              prob_candidate_norm=normalize(prob_candidate)
              if input[0] in input_candidate:
                for candidate_ind in range(len(input_candidate)):
                  if input[0] in input_candidate[candidate_ind: candidate_ind+1]:
                    break
                  pass
                alpha=min(prob_candidate_norm[candidate_ind]*prob_new_prob*config.action_prob[1]/(config.action_prob[2]*prob_old_prob), 1)
              else:
                pass
                alpha=0
              #alpha=min(prob_new_prob*config.action_prob[1]/(config.action_prob[2]*prob_old_prob), 1)
              print('action:2', alpha, prob_old_prob, prob_new_prob, prob_candidate_norm[candidate_ind], similarity_old)
             
              if choose_action([alpha, 1-alpha])==0 and (prob_new_prob> prob_old_prob*config.threshold or just_acc()==0):
                #write_log('step:'+str(iter)+'action:2 prob_old:'+str(prob_old_prob)+' prob_new:'+str(prob_new_prob)+' '+str(sta_vec.index(1))+' '+str(ind), config.use_log_path)
                input=np.concatenate([input[:,:ind+1], input[:,ind+2:], input[:,:1]*0+config.dict_size+1], axis=1)
                sequence_length-=1
                pos+=0
                del(sta_vec[ind])
                sta_vec.append(0)
              else:
                action=3
          #do nothing
          if action==3:
            #write_log('step:'+str(iter)+'action:3', config.use_log_path)
            pos+=1
示例#22
0
#!/usr/bin/env python

from __future__ import print_function

from reader import read_data
from functools import reduce
import numpy as np

with open('data.txt','r') as f:
    data = read_data(f.readlines())

print(data)


sleepiest_guard = max(data.values(), key=lambda g: g.total_sleeping_time)
print('sleepiest_guard:', sleepiest_guard)
#sleepiest_guard: Datum(
#   id=1601,
#   schedule=Schedule(schedule=array([ 1,  2,  3,  4,  4,  4,  6,  6,  6,  6,  6,  5,  5,  6,  7,  7,  6, 6,  6,  7,  9,  8,  7,  8,  9,  9, 10, 10,  9,  9,  9,  9,  9,  9, 11, 10, 10, 11, 11, 10, 11, 12, 12, 12, 12, 13, 14, 13, 12, 11, 11, 10, 10,  9,  9,  8,  8,  3,  3,  0]),
#      max=14,
#      argmax=46),
#   log=[(6, 16), (53, 59), (1, 11), (20, 57), (38, 48), (0, 35), (39, 54), (8, 21), (40, 53), (26, 59), (19, 39), (6, 8), (23, 49), (55, 57), (2, 22), (46, 57), (14, 40), (13, 39), (47, 55), (34, 47), (40, 47), (3, 28), (37, 53), (45, 55), (34, 38), (41, 51), (54, 57), (20, 57), (24, 59)],
#   total_sleeping_time=483,
#   longest_nap=37)

answer = sleepiest_guard.id * sleepiest_guard.schedule.argmax
print('Answer:', answer)
#73646
示例#23
0
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import normalize
import pylab as pl
import numpy as np
import gradient as gr
import ml
import reader

xs, ys = reader.read_data("prices.txt")
ys = ys / np.linalg.norm(ys)
xs = normalize(xs, axis=0, norm="l1")

x1s = xs[:, 0]
x2s = xs[:, 1]

fig = pl.figure()
ax = fig.add_subplot(111, projection='3d')


# tgt function is y = w0 + w * x + e
fds = ml.folds(xs, ys, 10)

fold = fds[0]
alpha = 0.2
w = gr.gradient_method(fold["train_p"], fold["train_c"], alpha)
print w


X = np.arange(0, 0.05, 0.001)
Y = np.arange(0, 0.03, 0.001)
X, Y = np.meshgrid(X, Y)
示例#24
0
文件: main.py 项目: eadm/ML
def quartic_kernel(u):
    return 15. / 16. * (1 - u**2)**2


def quartic_kernel2(u):
    return 3. / 4. * (1 - min(u**2, 1.0))


def minkowski(a, b, p):
    return np.sum(np.abs(a - b)**p)**1. / p


kernels = [gaussian_kernel, quartic_kernel, quartic_kernel2]

x, y = reader.read_data('non-parametric.csv')
metric = (lambda __x1, __x2: minkowski(__x1, __x2, 1))

min_mse = 99999999
min_a = []
for i in range(len(kernels)):
    kernel = kernels[i]
    for k in np.arange(0.05, 4., 0.05):  # np.arange(4, 20):
        xs = np.array(x)  # np.arange(min(x), max(x), 0.01)
        ys = []
        for pt in xs:
            ys.append(kernel_smoothing.smooth(x, y, pt, metric, kernel, k))

        mse = ml.mse(y, ys)
        if mse < min_mse:
            min_mse = mse
示例#25
0
def train_ngram(data_path, list_file, out_file):
    list_path = os.path.join(data_path, list_file)

    with open(out_file, "w") as f:
        for file_data in reader.read_data(data_path, list_path):
            f.write(" ".join(file_data) + "\n")
示例#26
0
    args = parser.parse_args()

    if args.checkpoint is not None:
        args.output_dir = os.path.dirname(args.checkpoint)
    try:
        os.mkdir(args.output_dir)
    except FileExistsError:
        assert os.path.isdir(
            args.output_dir), 'output_dir should be a directory'

    logging.basicConfig(filename=os.path.join(args.output_dir, 'train.log'),
                        format='[%(asctime)s] %(message)s',
                        filemode='w' if args.checkpoint is None else 'a',
                        level=logging.INFO)

    data = read_data()
    kfolds = model_selection.KFold(n_splits=10,
                                   shuffle=True,
                                   random_state=args.seed)
    train_index, val_index = next(kfolds.split(data))
    train_data, val_data = [data[i] for i in train_index
                            ], [data[i] for i in val_index]
    train_data = list(filter(lambda l: l.length > 0, train_data))
    val_data = filter(lambda l: l.length > 0, val_data)
    val_data = list(sorted(val_data, key=attrgetter('length')))

    if args.action is None:
        if args.checkpoint is not None:
            args.action = 'test'
        else:
            args.action = 'train'
示例#27
0
sys.path.insert(0, '../utils')
import reader

from config import config
config=config()


from model import LangModel

import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--backward', dest='backward', action='store_true', help='train the backward model (default is forward)')
parser.add_argument('-e', '--epoch', type=int, default=config.max_epoch, help="maximum number of epochs to run; default = {}".format(config.max_epoch))
parser.add_argument('-b', '--batch', type=int, default=config.batch_size, help="batch size; default = {}".format(config.batch_size))
args = parser.parse_args()

from utils import *
import numpy as np
import tensorflow as tf

# Define model and restore checkpoint if created

model = LangModel(config.backward_save_path if args.backward else config.forward_save_path)
model.restore()

# Train chosen model

print('Training {} language model'.format('backward' if args.backward else 'forward'))
train_data, train_sequence_length, test_data, test_sequence_length = reader.read_data(config.data_path, config.num_steps, is_backward=True)
model.compile()
model.run(train_data, test_data, args.epoch, args.batch)
示例#28
0
#!/usr/bin/env python

import reader
import sys
import pypsignifit as pf
import pypsignifit.psignipriors as pfp
import pylab as pl
import numpy as np
import swignifit.swignifit_raw as sfr
import integrate as ig
# import pypsignifit.psigniplot as pp

d, s = reader.read_data(sys.argv[1])
d = np.array(d)
# stimulus_intensities = [0.0,2.0,4.0,6.0,8.0,10.0]
# number_of_correct = [34,32,40,48,50,48]
# number_of_trials  = [50]*len(stimulus_intensities)
# data = zip(stimulus_intensities,number_of_correct,number_of_trials)
# d = np.array ( data )

model = {'nafc': 1, 'sigmoid': "logistic", 'core': 'mw0.1'}
m = 4.0
w = 4.0
l = 0.05
g = 0.02

priors = [
    "Gauss(%f,%f)" % (m, m),
    "Gauss(%f,%f)" % (w, w * 2), "Beta(2,50)", "Beta(1,50)"
]
# priors = (pfp.default_mid(d[:,0])[0],"Gamma(2,4)",pfp.default_lapse(),pfp.default_lapse())
示例#29
0
from reader import read_data

ulamki = read_data()


def nwd(a: int, b: int) -> bool:
    while b > 0:
        a, b = b, a % b

    return a


def wzglednie_pierwsze(a: int, b: int) -> bool:
    return nwd(a, b) == 1


count = 0
for ulamek in ulamki:
    licznik = int(ulamek[0])
    mianownik = int(ulamek[1])

    # ułamek jest nieskracalny, gdy licznik i mianownik
    # są względnie pierwsze
    if wzglednie_pierwsze(licznik, mianownik):
        count += 1

print(f"{count=}")
示例#30
0
def main(_):
    if os.path.exists(config.forward_log_path) and config.mode == 'forward':
        os.system('rm ' + config.forward_log_path)
    if os.path.exists(config.backward_log_path) and config.mode == 'backward':
        os.system('rm ' + config.backward_log_path)
    if os.path.exists(config.use_output_path):
        os.system('rm ' + config.use_output_path)
    if os.path.exists(config.use_output_path):
        os.system('rm ' + config.use_output_path)
    if os.path.exists(config.use_log_path):
        os.system('rm ' + config.use_log_path)
    if config.mode == 'forward' or config.mode == 'use':
        with tf.name_scope("forward_train"):
            with tf.variable_scope("forward", reuse=None):
                m_forward = PTBModel(is_training=True)
        with tf.name_scope("forward_test"):
            with tf.variable_scope("forward", reuse=True):
                mtest_forward = PTBModel(is_training=False)
        var = tf.trainable_variables()
        var_forward = [x for x in var if x.name.startswith('forward')]
        saver_forward = tf.train.Saver(var_forward, max_to_keep=1)
    if config.mode == 'backward' or config.mode == 'use':
        with tf.name_scope("backward_train"):
            with tf.variable_scope("backward", reuse=None):
                m_backward = PTBModel(is_training=True)
        with tf.name_scope("backward_test"):
            with tf.variable_scope("backward", reuse=True):
                mtest_backward = PTBModel(is_training=False)
        var = tf.trainable_variables()
        var_backward = [x for x in var if x.name.startswith('backward')]
        saver_backward = tf.train.Saver(var_backward, max_to_keep=1)

    init = tf.global_variables_initializer()

    configs = tf.ConfigProto()
    configs.gpu_options.allow_growth = True
    with tf.Session(config=configs) as session:
        session.run(init)
        if config.mode == 'forward':
            #train forward language model
            train_data, test_data = reader.read_data(config.data_path,
                                                     config.num_steps)
            test_mean_old = 15.0

            for epoch in range(config.max_epoch):
                train_ppl_list = []
                test_ppl_list = []
                for i in range(train_data.length // config.batch_size):
                    input, sequence_length, target = train_data(
                        m_forward.batch_size, i)
                    train_perplexity = run_epoch(session,
                                                 m_forward,
                                                 input,
                                                 sequence_length,
                                                 target,
                                                 mode='train')
                    train_ppl_list.append(train_perplexity)
                    print("Epoch:%d, Iter: %d Train NLL: %.3f" %
                          (epoch, i + 1, train_perplexity))
                for i in range(test_data.length // config.batch_size):
                    input, sequence_length, target = test_data(
                        mtest_forward.batch_size, i)
                    test_perplexity = run_epoch(session,
                                                mtest_forward,
                                                input,
                                                sequence_length,
                                                target,
                                                mode='test')
                    test_ppl_list.append(test_perplexity)
                    print("Epoch:%d, Iter: %d Test NLL: %.3f" %
                          (epoch, i + 1, test_perplexity))
                test_mean = np.mean(test_ppl_list)
                if test_mean < test_mean_old:
                    test_mean_old = test_mean
                    saver_forward.save(session, config.forward_save_path)
                write_log(
                    'train ppl:' + str(np.mean(train_ppl_list)) + '\t' +
                    'test ppl:' + str(test_mean), config.forward_log_path)

        if config.mode == 'backward':
            #train backward language model
            train_data, test_data = reader.read_data(config.data_path,
                                                     config.num_steps)
            test_mean_old = 15.0
            for epoch in range(config.max_epoch):
                train_ppl_list = []
                test_ppl_list = []

                for i in range(train_data.length // config.batch_size):
                    input, sequence_length, target = train_data(
                        m_backward.batch_size, i)
                    input, sequence_length, target = reverse_seq(
                        input, sequence_length, target)
                    train_perplexity = run_epoch(session,
                                                 m_backward,
                                                 input,
                                                 sequence_length,
                                                 target,
                                                 mode='train')
                    train_ppl_list.append(train_perplexity)
                    print("Epoch:%d, Iter: %d Train NLL: %.3f" %
                          (epoch, i + 1, train_perplexity))
                for i in range(test_data.length // config.batch_size):
                    input, sequence_length, target = test_data(
                        mtest_backward.batch_size, i)
                    input, sequence_length, target = reverse_seq(
                        input, sequence_length, target)
                    test_perplexity = run_epoch(session,
                                                mtest_backward,
                                                input,
                                                sequence_length,
                                                target,
                                                mode='test')
                    test_ppl_list.append(test_perplexity)
                    print("Epoch:%d, Iter: %d Test NLL: %.3f" %
                          (epoch, i + 1, test_perplexity))
                test_mean = np.mean(test_ppl_list)
                if test_mean < test_mean_old:
                    test_mean_old = test_mean
                    saver_backward.save(session, config.backward_save_path)
                write_log(
                    'train ppl:' + str(np.mean(train_ppl_list)) + '\t' +
                    'test ppl:' + str(test_mean), config.backward_log_path)

        if config.mode == 'use':
            #CGMH sampling for key_gen
            sim = config.sim
            saver_forward.restore(session, config.forward_save_path)
            saver_backward.restore(session, config.backward_save_path)
            config.shuffle = False

            #keyword input
            if config.keyboard_input == True:
                #input from keyboard if key_input is not empty
                key_input = raw_input('please input a sentence\n')
                if key_input == '':
                    use_data = reader.read_data_use(config.use_data_path,
                                                    config.num_steps)
                else:
                    key_input = key_input.split()
                    key_input = sen2id(key_input)
                    sta_vec = list(np.zeros([config.num_steps - 1]))
                    for i in range(len(key_input)):
                        sta_vec[i] = 1
                    use_data = reader.array_data([key_input], config.num_steps,
                                                 config.dict_size)
            else:
                #load keywords from file
                use_data, sta_vec_list = reader.read_data_use(
                    config.use_data_path, config.num_steps)
            config.batch_size = 1

            for sen_id in range(use_data.length):
                #generate for each sequence of keywords
                if config.keyboard_input == False:
                    sta_vec = sta_vec_list[sen_id % (config.num_steps - 1)]

                print(sta_vec)

                input, sequence_length, _ = use_data(1, sen_id)
                input_original = input[0]

                pos = 0
                outputs = []
                output_p = []
                for iter in range(config.sample_time):
                    #ind is the index of the selected word, regardless of the beginning token.
                    #sample config.sample_time times for each set of keywords
                    config.sample_prior = [1, 10.0 / sequence_length[0], 1, 1]
                    if iter % 20 < 10:
                        config.threshold = 0
                    else:
                        config.threshold = 0.5
                    ind = pos % (sequence_length[0])
                    action = choose_action(config.action_prob)
                    print(' '.join(id2sen(input[0])))

                    if sta_vec[ind] == 1 and action in [0, 2]:
                        #skip words that we do not change(original keywords)
                        action = 3

                    #word replacement (action: 0)
                    if action == 0 and ind < sequence_length[0] - 1:
                        prob_old = run_epoch(session,
                                             mtest_forward,
                                             input,
                                             sequence_length,
                                             mode='use')
                        if config.double_LM == True:
                            input_backward, _, _ = reverse_seq(
                                input, sequence_length, input)
                            prob_old = (prob_old + run_epoch(session,
                                                             mtest_backward,
                                                             input_backward,
                                                             sequence_length,
                                                             mode='use')) * 0.5

                        tem = 1
                        for j in range(sequence_length[0] - 1):
                            tem *= prob_old[0][j][input[0][j + 1]]
                        tem *= prob_old[0][j + 1][config.dict_size + 1]
                        prob_old_prob = tem

                        if sim != None:
                            similarity_old = similarity(
                                input[0], input_original, sta_vec)
                            prob_old_prob *= similarity_old
                        else:
                            similarity_old = -1
                        input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(
                            input, sequence_length, ind, mode=action)
                        prob_forward = run_epoch(
                            session,
                            mtest_forward,
                            input_forward,
                            sequence_length_forward,
                            mode='use')[0, ind % (sequence_length[0] - 1), :]
                        prob_backward = run_epoch(
                            session,
                            mtest_backward,
                            input_backward,
                            sequence_length_backward,
                            mode='use')[0, sequence_length[0] - 1 - ind %
                                        (sequence_length[0] - 1), :]
                        prob_mul = (prob_forward * prob_backward)
                        input_candidate, sequence_length_candidate = generate_candidate_input(
                            input,
                            sequence_length,
                            ind,
                            prob_mul,
                            config.search_size,
                            mode=action)
                        prob_candidate_pre = run_epoch(
                            session,
                            mtest_forward,
                            input_candidate,
                            sequence_length_candidate,
                            mode='use')
                        if config.double_LM == True:
                            input_candidate_backward, _, _ = reverse_seq(
                                input_candidate, sequence_length_candidate,
                                input_candidate)
                            prob_candidate_pre = (
                                prob_candidate_pre +
                                run_epoch(session,
                                          mtest_backward,
                                          input_candidate_backward,
                                          sequence_length_candidate,
                                          mode='use')) * 0.5
                        prob_candidate = []
                        for i in range(config.search_size):
                            tem = 1
                            for j in range(sequence_length[0] - 1):
                                tem *= prob_candidate_pre[i][j][
                                    input_candidate[i][j + 1]]
                            tem *= prob_candidate_pre[i][j +
                                                         1][config.dict_size +
                                                            1]
                            prob_candidate.append(tem)

                        prob_candidate = np.array(prob_candidate)
                        if sim != None:
                            similarity_candidate = similarity_batch(
                                input_candidate, input_original, sta_vec)
                            prob_candidate = prob_candidate * similarity_candidate
                        prob_candidate_norm = normalize(prob_candidate)
                        prob_candidate_ind = sample_from_candidate(
                            prob_candidate_norm)
                        prob_candidate_prob = prob_candidate[
                            prob_candidate_ind]
                        if input_candidate[prob_candidate_ind][
                                ind + 1] < config.dict_size and (
                                    prob_candidate_prob > prob_old_prob *
                                    config.threshold or just_acc() == 0):
                            input = input_candidate[
                                prob_candidate_ind:prob_candidate_ind + 1]
                        pos += 1
                        print('action:0', 1, prob_old_prob,
                              prob_candidate_prob,
                              prob_candidate_norm[prob_candidate_ind],
                              similarity_old)
                        if ' '.join(id2sen(input[0])) not in output_p:
                            outputs.append(
                                [' '.join(id2sen(input[0])), prob_old_prob])

                    #word insertion(action:1)
                    if action == 1:
                        if sequence_length[0] >= config.num_steps:
                            action = 3
                        else:
                            input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(
                                input, sequence_length, ind, mode=action)
                            prob_forward = run_epoch(
                                session,
                                mtest_forward,
                                input_forward,
                                sequence_length_forward,
                                mode='use')[0,
                                            ind % (sequence_length[0] - 1), :]
                            prob_backward = run_epoch(
                                session,
                                mtest_backward,
                                input_backward,
                                sequence_length_backward,
                                mode='use')[0, sequence_length[0] - 1 - ind %
                                            (sequence_length[0] - 1), :]
                            prob_mul = (prob_forward * prob_backward)
                            input_candidate, sequence_length_candidate = generate_candidate_input(
                                input,
                                sequence_length,
                                ind,
                                prob_mul,
                                config.search_size,
                                mode=action)
                            prob_candidate_pre = run_epoch(
                                session,
                                mtest_forward,
                                input_candidate,
                                sequence_length_candidate,
                                mode='use')
                            if config.double_LM == True:
                                input_candidate_backward, _, _ = reverse_seq(
                                    input_candidate, sequence_length_candidate,
                                    input_candidate)
                                prob_candidate_pre = (
                                    prob_candidate_pre +
                                    run_epoch(session,
                                              mtest_backward,
                                              input_candidate_backward,
                                              sequence_length_candidate,
                                              mode='use')) * 0.5

                            prob_candidate = []
                            for i in range(config.search_size):
                                tem = 1
                                for j in range(sequence_length_candidate[0] -
                                               1):
                                    tem *= prob_candidate_pre[i][j][
                                        input_candidate[i][j + 1]]
                                tem *= prob_candidate_pre[i][j + 1][
                                    config.dict_size + 1]
                                prob_candidate.append(tem)
                            prob_candidate = np.array(
                                prob_candidate) * config.sample_prior[1]
                            if sim != None:
                                similarity_candidate = similarity_batch(
                                    input_candidate, input_original, sta_vec)
                                prob_candidate = prob_candidate * similarity_candidate
                            prob_candidate_norm = normalize(prob_candidate)

                            prob_candidate_ind = sample_from_candidate(
                                prob_candidate_norm)
                            prob_candidate_prob = prob_candidate[
                                prob_candidate_ind]

                            prob_old = run_epoch(session,
                                                 mtest_forward,
                                                 input,
                                                 sequence_length,
                                                 mode='use')
                            if config.double_LM == True:
                                input_backward, _, _ = reverse_seq(
                                    input, sequence_length, input)
                                prob_old = (prob_old +
                                            run_epoch(session,
                                                      mtest_backward,
                                                      input_backward,
                                                      sequence_length,
                                                      mode='use')) * 0.5

                            tem = 1
                            for j in range(sequence_length[0] - 1):
                                tem *= prob_old[0][j][input[0][j + 1]]
                            tem *= prob_old[0][j + 1][config.dict_size + 1]

                            prob_old_prob = tem
                            if sim != None:
                                similarity_old = similarity(
                                    input[0], input_original, sta_vec)
                                prob_old_prob = prob_old_prob * similarity_old
                            else:
                                similarity_old = -1
                            #alpha is acceptance ratio of current proposal
                            alpha = min(
                                1,
                                prob_candidate_prob * config.action_prob[2] /
                                (prob_old_prob * config.action_prob[1] *
                                 prob_candidate_norm[prob_candidate_ind]))
                            print('action:1', alpha, prob_old_prob,
                                  prob_candidate_prob,
                                  prob_candidate_norm[prob_candidate_ind],
                                  similarity_old)
                            if ' '.join(id2sen(input[0])) not in output_p:
                                outputs.append([
                                    ' '.join(id2sen(input[0])), prob_old_prob
                                ])
                            if choose_action([
                                    alpha, 1 - alpha
                            ]) == 0 and input_candidate[prob_candidate_ind][
                                    ind + 1] < config.dict_size and (
                                        prob_candidate_prob > prob_old_prob *
                                        config.threshold or just_acc() == 0):
                                input = input_candidate[
                                    prob_candidate_ind:prob_candidate_ind + 1]
                                sequence_length += 1
                                pos += 2
                                sta_vec.insert(ind, 0.0)
                                del (sta_vec[-1])
                            else:
                                action = 3

                #word deletion(action: 2)
                    if action == 2 and ind < sequence_length[0] - 1:
                        if sequence_length[0] <= 2:
                            action = 3
                        else:

                            prob_old = run_epoch(session,
                                                 mtest_forward,
                                                 input,
                                                 sequence_length,
                                                 mode='use')
                            if config.double_LM == True:
                                input_backward, _, _ = reverse_seq(
                                    input, sequence_length, input)
                                prob_old = (prob_old +
                                            run_epoch(session,
                                                      mtest_backward,
                                                      input_backward,
                                                      sequence_length,
                                                      mode='use')) * 0.5

                            tem = 1
                            for j in range(sequence_length[0] - 1):
                                tem *= prob_old[0][j][input[0][j + 1]]
                            tem *= prob_old[0][j + 1][config.dict_size + 1]
                            prob_old_prob = tem
                            if sim != None:
                                similarity_old = similarity(
                                    input[0], input_original, sta_vec)
                                prob_old_prob = prob_old_prob * similarity_old
                            else:
                                similarity_old = -1
                            input_candidate, sequence_length_candidate = generate_candidate_input(
                                input,
                                sequence_length,
                                ind,
                                None,
                                config.search_size,
                                mode=2)
                            prob_new = run_epoch(session,
                                                 mtest_forward,
                                                 input_candidate,
                                                 sequence_length_candidate,
                                                 mode='use')
                            tem = 1
                            for j in range(sequence_length_candidate[0] - 1):
                                tem *= prob_new[0][j][input_candidate[0][j +
                                                                         1]]
                            tem *= prob_new[0][j + 1][config.dict_size + 1]
                            prob_new_prob = tem
                            if sim != None:
                                similarity_new = similarity_batch(
                                    input_candidate, input_original, sta_vec)
                                prob_new_prob = prob_new_prob * similarity_new

                            input_forward, input_backward, sequence_length_forward, sequence_length_backward = cut_from_point(
                                input, sequence_length, ind, mode=0)
                            prob_forward = run_epoch(
                                session,
                                mtest_forward,
                                input_forward,
                                sequence_length_forward,
                                mode='use')[0,
                                            ind % (sequence_length[0] - 1), :]
                            prob_backward = run_epoch(
                                session,
                                mtest_backward,
                                input_backward,
                                sequence_length_backward,
                                mode='use')[0, sequence_length[0] - 1 - ind %
                                            (sequence_length[0] - 1), :]
                            prob_mul = (prob_forward * prob_backward)
                            input_candidate, sequence_length_candidate = generate_candidate_input(
                                input,
                                sequence_length,
                                ind,
                                prob_mul,
                                config.search_size,
                                mode=0)
                            prob_candidate_pre = run_epoch(
                                session,
                                mtest_forward,
                                input_candidate,
                                sequence_length_candidate,
                                mode='use')
                            if config.double_LM == True:
                                input_candidate_backward, _, _ = reverse_seq(
                                    input_candidate, sequence_length_candidate,
                                    input_candidate)
                                prob_candidate_pre = (
                                    prob_candidate_pre +
                                    run_epoch(session,
                                              mtest_backward,
                                              input_candidate_backward,
                                              sequence_length_candidate,
                                              mode='use')) * 0.5

                            prob_candidate = []
                            for i in range(config.search_size):
                                tem = 1
                                for j in range(sequence_length[0] - 1):
                                    tem *= prob_candidate_pre[i][j][
                                        input_candidate[i][j + 1]]
                                tem *= prob_candidate_pre[i][j + 1][
                                    config.dict_size + 1]
                                prob_candidate.append(tem)
                            prob_candidate = np.array(prob_candidate)

                            if sim != None:
                                similarity_candidate = similarity_batch(
                                    input_candidate, input_original, sta_vec)
                                prob_candidate = prob_candidate * similarity_candidate

                            #alpha is acceptance ratio of current proposal
                            prob_candidate_norm = normalize(prob_candidate)
                            if input[0] in input_candidate:
                                for candidate_ind in range(
                                        len(input_candidate)):
                                    if input[0] in input_candidate[
                                            candidate_ind:candidate_ind + 1]:
                                        break
                                    pass
                                alpha = min(
                                    prob_candidate_norm[candidate_ind] *
                                    prob_new_prob * config.action_prob[1] /
                                    (config.action_prob[2] * prob_old_prob), 1)
                            else:
                                pass
                                alpha = 0
                            print('action:2', alpha, prob_old_prob,
                                  prob_new_prob,
                                  prob_candidate_norm[candidate_ind],
                                  similarity_old)
                            if ' '.join(id2sen(input[0])) not in output_p:
                                outputs.append([
                                    ' '.join(id2sen(input[0])), prob_old_prob
                                ])
                            if choose_action([
                                    alpha, 1 - alpha
                            ]) == 0 and (prob_new_prob > prob_old_prob *
                                         config.threshold or just_acc() == 0):
                                input = np.concatenate([
                                    input[:, :ind + 1], input[:, ind + 2:],
                                    input[:, :1] * 0 + config.dict_size + 1
                                ],
                                                       axis=1)
                                sequence_length -= 1
                                pos += 0
                                del (sta_vec[ind])
                                sta_vec.append(0)
                            else:
                                action = 3
                    #skip word (action: 3)
                    if action == 3:
                        #write_log('step:'+str(iter)+'action:3', config.use_log_path)
                        pos += 1
                    print(outputs)
                    if outputs != []:
                        output_p.append(outputs[-1][0])

                #choose output from samples
                for num in range(config.min_length, 0, -1):
                    outputss = [x for x in outputs if len(x[0].split()) >= num]
                    print(num, outputss)
                    if outputss != []:
                        break
                if outputss == []:
                    outputss.append([' '.join(id2sen(input[0])), 1])
                outputss = sorted(outputss, key=lambda x: x[1])[::-1]
                with open(config.use_output_path, 'a') as g:
                    g.write(outputss[0][0] + '\n')
示例#31
0
import reader

data = reader.read_data('example.data')
print(data)
示例#32
0
def train_ngram(data_path, list_file, out_file):
    list_path = os.path.join(data_path, list_file)

    with open(out_file, "w") as f:
        for file_data in reader.read_data(data_path, list_path):
            f.write(" ".join(file_data) + "\n")