Пример #1
0
    def sample_model(self, args, epoch, idx):
        dataA = glob(os.path.join(self.dataset_dir, 'testA', '*'))
        dataB = glob(os.path.join(self.dataset_dir, 'testB', '*'))

        np.random.shuffle(dataA)
        np.random.shuffle(dataB)

        batch_files = [dataA[0], dataB[0]]

        sample_images = []
        for batch_file in batch_files:
            sample_image = load_test_data(
                batch_file,
                args.fine_size,
            )
            sample_images.append(sample_image)

        sample_images = [np.concatenate((sample_images[0], sample_images[1]), axis=2)]
        sample_images = np.array(sample_images).astype(np.float32)

        fake_A, fake_B = self.sess.run(
            [self.fake_A, self.fake_B],
            feed_dict={self.real_data: sample_images}
        )
        save_images(fake_A, [1, 1],
                    './{}/A_{:04d}_{:06d}.jpg'.format(args.sample_dir, epoch, idx))
        save_images(fake_B, [1, 1],
                    './{}/B_{:04d}_{:06d}.jpg'.format(args.sample_dir, epoch, idx))
Пример #2
0
def load_test():
    ## load word vector
    with utils.timer('Load word vector'):
        word2vec = tl.files.load_npy_to_any(name='%s/word2vec/w2v_sgns_%s_%s_%s.npy' % (
        config.ModelOutputDir, config.embedding_size, config.corpus_version, datestr))
    ## load train data
    with utils.timer('Load test data'):
        test_data, uid_list, info_id_list = utils.load_test_data(test_file)
        test_data, uid_list, info_id_list  = test_data[:int(0.2 * len(test_data))], uid_list[:int(0.2 * len(uid_list))], info_id_list[:int(0.2 * len(info_id_list))]
    with utils.timer('representation for test'):
        X_test = []
        text_test = []
        for i in range(len(test_data)):
            text = test_data[i]
            if(text == ''):
                continue
            words = utils.cut(text)
            if(len(words) == 0):
                continue
            X_test.append([word2vec.get(w, word2vec['_UNK']) for w in words])
            text_test.append(text)
    del word2vec
    gc.collect()

    return X_test, text_test, uid_list, info_id_list
Пример #3
0
 def test_decoding(self):
     tdata = utils.load_test_data("rlptest.txt")
     for expected, sample in tdata:
         actual = rlp.decode(sample.decode('hex'))
         assert expected == actual, (
             "RLPDecode mismatch for sample '%s'; expected='%s' - "
             "actual='%s'" % (sample, expected, actual))
Пример #4
0
 def test_encoding(self):
     tdata = utils.load_test_data("hexencodetest.txt")
     for sample, expected in tdata:
         actual = trie.hexarraykey_to_bin(sample).encode('hex')
         assert expected == actual, (
             "HexEncode mismatch for sample '%s'; expected='%s' - "
             "actual='%s'" % (sample, expected, actual))
Пример #5
0
def train_knn(args):
    # Load the data
    x_train, y_train = load_train_data(args.data_home)
    x_test, y_test = load_test_data(args.data_home)

    # Flatten images
    x_train = x_train.reshape(-1, 784)
    x_test = x_test.reshape(-1, 784)

    # Set hyperparameters
    N_TRAIN = len(y_train)
    N_TEST = len(y_test)

    wandb.init()
    config = {
        "model_type": "knn",
        "n_train": N_TRAIN,
        "n_test": N_TEST,
        "k_neighbors": args.k_neighbors,
        "weights": args.weights
    }
    wandb.config.update(config)

    clf = KNeighborsClassifier(n_neighbors=args.k_neighbors,
                               weights=args.weights,
                               n_jobs=-1)
    print('Fitting', clf)
    clf.fit(x_train, y_train)
    print('Evaluating', clf)
    test_score = clf.score(x_test, y_test)
    print('Test accuracy:', test_score)
    # store train accuracy as validation accuracy as well to simplify
    # comparison to CNN/other scripts
    wandb.log({"accuracy": test_score})
    wandb.log({"kmnist_val_acc": test_score})
Пример #6
0
def train():
    # feature
    feature_set = load_feature_set(feature_path)
    feature_map = load_feature_map(feature_path)

    # load data
    train_imgs, train_sents = load_train_data(imgs_dir,
                                              tags_path,
                                              feature_set,
                                              imresize_shape=inputs_shape,
                                              max_data_len=None)
    train_sents = sent2feature(train_sents,
                               feature_map,
                               max_feature_len=seq_vec_len)

    exp_sents, _ = load_test_data(exp_text_path)
    exp_sents = sent2feature(exp_sents,
                             feature_map,
                             max_feature_len=seq_vec_len)

    # data info
    print('feature map:', feature_map)
    print('train img shape:', train_imgs.shape)
    print('train sents shape:', train_sents.shape)
    print('exp sents:', exp_sents.shape)

    # model
    model = GAN(inputs_shape,
                seq_vec_len,
                output_shape,
                summary_path='./models/log')

    # train
    model.train(train=[train_imgs, train_sents], valid_seqs=exp_sents)
    model.save('./models/finish/finish')
Пример #7
0
def test_gomod_vendor_without_flag(test_env):
    """
    Validate failing of gomod vendor request without flag.

    Checks:
    * The request failed with expected error message
    """
    env_data = utils.load_test_data(
        "gomod_packages.yaml")["vendored_without_flag"]
    client = utils.Client(test_env["api_url"], test_env["api_auth_type"],
                          test_env.get("timeout"))
    initial_response = client.create_new_request(payload={
        "repo":
        env_data["repo"],
        "ref":
        env_data["ref"],
        "pkg_managers":
        env_data["pkg_managers"],
    }, )
    completed_response = client.wait_for_complete_request(initial_response)
    if test_env.get("strict_mode_enabled"):
        assert completed_response.status == 200
        assert completed_response.data["state"] == "failed"
        error_msg = (
            'The "gomod-vendor" flag must be set when your repository has vendored dependencies'
        )
        assert error_msg in completed_response.data["state_reason"], (
            f"#{completed_response.id}: Request failed correctly, but with unexpected message: "
            f"{completed_response.data['state_reason']}. Expected message was: {error_msg}"
        )
    else:
        utils.assert_properly_completed_response(completed_response)
Пример #8
0
 def test_decoding(self):
     tdata = utils.load_test_data("rlptest.txt")
     for expected, sample in tdata:
         actual = rlp.decode(sample.decode('hex'))
         assert expected == actual, (
             "RLPDecode mismatch for sample '%s'; expected='%s' - "
             "actual='%s'" % (sample, expected, actual))
Пример #9
0
def run_test(saved_weights,
             model,
             conf,
             x_word_to_idx,
             x_max_len,
             y_idx_to_word,
             num=None):
    # Only performing test if there is any saved weights
    if len(saved_weights) == 0:
        print("The network hasn't been trained! Program will exit...")
        sys.exit()
    else:
        print(" - loading test data")
        x_test = load_test_data('test', x_word_to_idx, conf['MAX_LEN'])
        if num:
            x_test = x_test[0:num]
        x_test = pad_sequences(x_test, maxlen=x_max_len, dtype='int32')
        print(" - loading model")
        model.load_weights(saved_weights)

        print(" - calculating predictions")
        predictions = np.argmax(model.predict(x_test), axis=2)
        sequences = []
        print(" - processing")
        for prediction in predictions:
            sequence = ' '.join(
                [y_idx_to_word[index] for index in prediction if index > 0])
            print(sequence)
            sequences.append(sequence)
        np.savetxt('test_result', sequences, fmt='%s')
Пример #10
0
 def test_decoding(self):
     tdata = utils.load_test_data("hexencodetest.txt")
     for expected, sample in tdata:
         actual = trie.bin_to_hexarraykey(sample.decode('hex'))
         assert expected == actual, (
             "HexDecode mismatch for sample '%s'; expected='%s' - "
             "actual='%s'" % (sample, expected, actual))
Пример #11
0
def predict_pb(model_dir, model_id):

    imgs_test, imgs_id_test = utils.load_test_data('../data')
    target_shape = (128,128)
    imgs_test = utils.preprocess_x(imgs_test, new_shape=target_shape)

    bz = 32
    preds = np.zeros_like(imgs_test, dtype=np.float64)
    model_path = join(model_dir, model_id, '{}.pb'.format(model_id))

    with tf.Session() as sess:
        with tf.gfile.GFile(model_path, "rb") as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())

        sess.graph.as_default()
        tf.import_graph_def(graph_def, name="Unet")

        input_tensor_name = "Unet/input_1:0"
        output_tensor_name = "Unet/conv2d_19_1/Sigmoid:0"
        #x = graph.get_tensor_by_name(input_tensor_name)
        #y = graph.get_tensor_by_name(output_tensor_name)
        for i in range(imgs_test.shape[0] // bz):
            pred = sess.run(output_tensor_name, 
                             feed_dict={input_tensor_name: imgs_test[i*bz:(i+1)*bz]})
            preds[i*bz:(i+1)*bz] = pred
            end = i
        # Last remaining data
        pred = sess.run(output_tensor_name, 
                        feed_dict={input_tensor_name: imgs_test[(end+1)*bz:]})
        preds[(end+1)*bz:] = pred

    print(preds.shape)
    np.save(join(model_id, '{}_imgs_mask_test.npy'.format(model_id)), preds)
Пример #12
0
class TestShowComments(object):
    comments_uri = mappers.resource_uri(User(id=1), 'comments')
    comments = utils.load_test_data(
        'show_comments.json')['comments_with_previews']

    def test_no_user(self, client):
        with requests_mock.Mocker() as mock:
            mock.get(self.comments_uri,
                     status_code=404,
                     json=utils.error_response('User', 1))

            response = client.get(url_for('userinfo.show_comments', user_id=1))
            assert response.status_code == 200

    def test_user_no_comments(self, client):
        with requests_mock.Mocker() as mock:
            mock.get(self.comments_uri, json={'data': []})
            response = client.get(url_for('userinfo.show_comments', user_id=1))
            assert response.status_code == 200

    def test_user_with_comments(self, client):
        with requests_mock.Mocker() as mock:
            mock.get(self.comments_uri, json=self.comments)
            response = client.get(url_for('userinfo.show_comments', user_id=1))
            assert response.status_code == 200
Пример #13
0
def test_packages(env_package, env_name, test_env, tmpdir):
    """
    Validate data in the package request according to pytest env_name and env_package parameter.

    Process:
    Send new request to the Cachito API
    Send request to check status of existing request

    Checks:
    * Check that the request completes successfully
    * Check that expected packages are identified in response
    * Check that expected dependencies are identified in response
    * Check response parameters of the package
    * Check that the source tarball includes the application source code
    * Check that the source tarball includes expected deps directory
    * Check: The content manifest is successfully generated and contains correct content
    """
    env_data = utils.load_test_data(f"{env_package}.yaml")[env_name]
    client = utils.Client(test_env["api_url"], test_env["api_auth_type"],
                          test_env.get("timeout"))

    payload = {
        "repo": env_data["repo"],
        "ref": env_data["ref"],
        "pkg_managers": env_data.get("pkg_managers", []),
        "flags": env_data.get("flags", []),
    }
    if env_name == "implicit_gomod":
        payload.pop("pkg_managers")

    initial_response = client.create_new_request(payload=payload)
    completed_response = client.wait_for_complete_request(initial_response)
    response_data = completed_response.data
    expected_response_data = env_data["response_expectations"]
    utils.assert_elements_from_response(response_data, expected_response_data)

    client.download_and_extract_archive(completed_response.id, tmpdir)
    source_path = tmpdir.join(f"download_{str(completed_response.id)}")
    expected_files = env_data["expected_files"]
    utils.assert_expected_files(source_path, expected_files, tmpdir)

    purl = env_data.get("purl", "")
    deps_purls = []
    source_purls = []
    if "dep_purls" in env_data:
        deps_purls = [{"purl": x} for x in env_data["dep_purls"]]
    if "source_purls" in env_data:
        source_purls = [{"purl": x} for x in env_data["source_purls"]]

    if purl:
        image_contents = [{
            "dependencies": deps_purls,
            "purl": purl,
            "sources": source_purls
        }]
    else:
        image_contents = env_data["image_contents"]
    utils.assert_content_manifest(client, completed_response.id,
                                  image_contents)
Пример #14
0
def main(args):
    dNet = net.DigitRecNet()
    optimizer = optim.SGD(dNet.parameters(), lr=args.lr, momentum=0.5)
    criterion = torch.nn.NLLLoss()

    if not args.train:
        logging.info('-' * 50)
        logging.info('Start testing ... ')
        load_model(dNet, args.model_file, 'BestModel')
        logging.info('finish load model: %s' % args.model_file)
        test_x = utils.load_test_data(args.test_file, args.N, args.M)
        logging.info('Load test : %d' % len(test_x))
        test_input_x = Variable(torch.FloatTensor(test_x))
        test_input_x = test_input_x.resize(test_input_x.size()[0], 1, args.N,
                                           args.M)
        only_test(dNet, test_input_x, args.result_file)
        return

    train_x, train_y = utils.load_data(args.train_file, args.N, args.M)
    dev_x, dev_y = utils.load_data(args.dev_file, args.N, args.M)
    logging.info('-' * 50)
    logging.info('Load train : %d, Load dev : %d' % (len(train_x), len(dev_x)))

    #train
    logging.info('-' * 50)
    logging.info('Start training ... ')

    dev_input_x = Variable(torch.FloatTensor(dev_x))
    dev_input_x = dev_input_x.resize(dev_input_x.size()[0], 1, args.N, args.M)
    dev_pred_y = Variable(torch.LongTensor(dev_y))

    best_accuracy = 0
    for epoch_id in range(args.epoch):
        logging.info('Epoch : %d' % epoch_id)

        data = utils.random_data((train_x, train_y), args.batch_size)
        for it, (input_x, pred_y) in enumerate(data):
            input_x = Variable(torch.FloatTensor(input_x))
            input_x = input_x.resize(input_x.size()[0], 1, args.N, args.M)
            pred_y = Variable(torch.LongTensor(pred_y))
            assert input_x.size()[0] == pred_y.size()[0]

            optimizer.zero_grad()
            output_x = dNet(input_x)
            loss = criterion(output_x, pred_y)
            loss.backward()
            optimizer.step()

            logging.info('Iteration (%d) loss : %.6f' % (it, loss))

            if (it % args.iter_cnt == 0):
                tmp_accuracy = test(dNet, dev_input_x, dev_pred_y)
                if tmp_accuracy > best_accuracy:
                    best_accuracy = tmp_accuracy
                    save_model(dNet, epoch_id, args.model_file, 'Best')
                logging.info(
                    "Epoch : %d, Accuarcy : %.2f%%, Best Accuatcy : %.2f%%" %
                    (epoch_id, tmp_accuracy, best_accuracy))
Пример #15
0
class TestShowUsers(object):
    users_uri = mappers.collection_uri(User())
    test_data = utils.load_test_data('show_users.json')

    def test_no_users(self, client):
        with requests_mock.Mocker() as mock:
            mock.get(self.users_uri, json={'data': []})
            response = client.get(url_for('userinfo.show_users'))
            assert response.status_code == 200
Пример #16
0
def conv_knrm_api(qpool, logdir, dataset_path, train_id, parameter):
    keras.backend.clear_session()
    # load数据并创建preprocessor对象
    train_pack = load_train_data(train_id, parameter['existing_dataset'], parameter['task'])
    predict_pack = load_test_data(train_id, parameter['existing_dataset'], parameter['task'])
    preprocessor = mz.preprocessors.BasicPreprocessor(fixed_length_left=10, fixed_length_right=100,
                                                      remove_stop_words=False)
    # 重定向stderr到log文件
    logdir.set_preprocess_id(train_id)
    err_old = sys.stderr
    sys.stderr = logdir
    # preprocessor.fit的内容写出到log,写完后关闭重定向,保存preprocessor
    train_pack_processed = preprocessor.fit_transform(train_pack)
    sys.stderr = err_old
    preprocessor.save(ROOT_PATH + 'matchzoo_temp_files/preprocessors/' + train_id + '.conv_knrm_preprocessor')
    predict_pack_processed = preprocessor.transform(predict_pack)
    with open(ROOT_PATH + 'matchzoo_temp_files/logger/' + train_id + '.preprocess_log', 'a') as f:
        f.write('Preprocess finished!')
    ranking_task = mz.tasks.Ranking(loss=mz.losses.RankHingeLoss())
    ranking_task.metrics = [
        mz.metrics.NormalizedDiscountedCumulativeGain(k=3),
        mz.metrics.NormalizedDiscountedCumulativeGain(k=5),
        mz.metrics.MeanAveragePrecision()
    ]

    model = mz.models.ConvKNRM()
    model.params['input_shapes'] = preprocessor.context['input_shapes']
    model.params['task'] = ranking_task
    model.params['embedding_input_dim'] = preprocessor.context['vocab_size']
    model.params['embedding_output_dim'] = 100 #parameter['embedding_output_dim']
    model.params['embedding_trainable'] = True
    model.params['filters'] = parameter['filters']
    model.params['conv_activation_func'] = 'tanh'
    model.params['max_ngram'] = parameter['max_ngram']
    model.params['use_crossmatch'] = True
    model.params['kernel_num'] = parameter['kernel_num']
    model.params['sigma'] = 0.1
    model.params['exact_sigma'] = 0.001
    model.params['optimizer'] = 'adadelta'
    model.guess_and_fill_missing_params()
    model.build()
    model.compile()
    model.backend.summary()
    glove_embedding = mz.datasets.embeddings.load_glove_embedding(dimension=100)
    embedding_matrix = glove_embedding.build_matrix(preprocessor.context['vocab_unit'].state['term_index'])
    model.load_embedding_matrix(embedding_matrix)
    pred_x, pred_y = predict_pack_processed[:].unpack()
    evaluate = mz.callbacks.EvaluateAllMetrics(model, x=pred_x, y=pred_y, batch_size=len(pred_y))
    train_generator = mz.PairDataGenerator(train_pack_processed, num_dup=2, num_neg=1, batch_size=20)
    # 重定向stdout到log当中
    qpool.set_trainid(train_id)
    old = sys.stdout
    sys.stdout = qpool
    model.fit_generator(train_generator, epochs=parameter['epochs'], callbacks=[evaluate], workers=5, use_multiprocessing=False)
    sys.stdout = old
    model.save(ROOT_PATH + 'matchzoo_temp_files/models/' + train_id + '.conv_knrm_model')
Пример #17
0
 def test(self):
     test_data, __ = utils.load_test_data()
     utils.save_data_as_lmdb(const.LMDB_TEST_DATA_PATH, test_data, True)
     result = self.__get_predicted_output('alexnet_result_2.prototxt', 'cifar3_3_iter_100000.caffemodel.h5')
     res = np.zeros(len(result), dtype=int)
     for i in xrange(len(result)):
         res[i] = (np.argmax(result[i]))
         # print res[i]
     # print len(res)
     np.savetxt("results3.csv", res.astype(dtype=int))
Пример #18
0
 def test(self):
     test_data, __ = utils.load_test_data()
     # utils.save_data_as_lmdb('cifar5_test_data_lmdb', test_data, True)
     result = self.__get_predicted_output('alexnet_result_5.prototxt', 'cifar3_5_iter_200000.caffemodel.h5')
     res = np.zeros(len(result), dtype=int)
     for i in xrange(len(result)):
         res[i] = (np.argmax(result[i]))
         # print res[i]
     # print len(res)
     np.savetxt("results5.csv", res.astype(dtype=int))
Пример #19
0
 def test(self):
     test_data, __ = utils.load_test_data()
     utils.save_data_as_hdf5(const.HDF5_RESULT_DATA_PATH, test_data, True)
     result = self.__get_predicted_output(const.ALEXNET_RESULT, 'alexnet/cifar3_1_iter_40000.caffemodel.h5')
     res = np.zeros(len(result), dtype=int)
     for i in xrange(len(result)):
         res[i] = (np.argmax(result[i]))
         # print res[i]
     # print len(res)
     np.savetxt("alexnet/results.csv", res.astype(dtype=int))
Пример #20
0
    def test(self, args):
        """Test cyclegan"""
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)
        if args.which_direction == 'AtoB':
            sample_files = glob(os.path.join(
                '.',
                'datasets',
                self.dataset_dir,
                'testA',
                '*.*'
                )
            )
        elif args.which_direction == 'BtoA':
            sample_files = glob(os.path.join(
                '.',
                'datasets',
                self.dataset_dir,
                'testB',
                '*.*'
                )
            )
        else:
            raise Exception('--which_direction must be AtoB or BtoA')

        if self.load(args.checkpoint_dir):
            print(" [*] Checkpoint Load SUCCESS")
        else:
            print(" [!] Checkpoint Load failed...")

        # write html for visual comparison
        index_path = os.path.join(args.test_dir, '{0}_index.html'.format(args.which_direction))
        index = open(index_path, "w")
        index.write("<html><body><table><tr>")
        index.write("<th>name</th><th>input</th><th>output</th></tr>")

        out_var, in_var = (self.testB, self.test_A) if args.which_direction == 'AtoB' else (
            self.testA, self.test_B)

        for sample_file in sample_files:
            print('Processing image: ' + sample_file)
            sample_image = [load_test_data(sample_file, args.fine_size)]
            sample_image = np.array(sample_image).astype(np.float32)
            image_path = os.path.join(args.test_dir,
                                      '{0}_{1}'.format(args.which_direction, os.path.basename(sample_file)))
            fake_img = self.sess.run(out_var, feed_dict={in_var: sample_image})
            save_images(fake_img, [1, 1], image_path)
            index.write("<td>%s</td>" % os.path.basename(image_path))
            index.write("<td><img src='%s'></td>" % (sample_file if os.path.isabs(sample_file) else (
                '..' + os.path.sep + sample_file)))
            index.write("<td><img src='%s'></td>" % (image_path if os.path.isabs(image_path) else (
                '..' + os.path.sep + image_path)))
            index.write("</tr>")

        index.close()
    def sample_model(self, sample_dir, counter):

        pair_index = int(np.random.randint(0, self.train_set_size))
        mask = torch.from_numpy(
            1.0 - self.file_training_mask["mask"][0, :, :, 0]).to(self.device)

        train_images = load_train_data(pair_index, is_testing=True, batch_size=1, \
            fileA=self.file_trainA, fileB=self.file_trainB, dataset="train_dataset", device=self.device)

        full_data = train_images[:, :2, ...]
        partial_data = train_images[:, 2:, ...]

        pred_data = self.G(partial_data)
        pred_data = partial_data + pred_data * mask

        SNR = self.signal_to_noise(full_data, pred_data)
        self.writer.add_scalar('training SNR', SNR, counter)

        print(("Recovery SNR for real part (training data): %4.4f" % (SNR)))

        #################################

        pair_index = int(np.random.randint(0, self.test_set_size))
        mask = torch.from_numpy(1.0 - self.file_mask["mask"][0, :, :, 0]).to(
            self.device)

        partial_data = load_test_data(pair_index,
                                      filetest=self.file_testB,
                                      dataset="test_dataset",
                                      device=self.device)
        full_data = load_test_data(pair_index,
                                   filetest=self.file_testA,
                                   dataset="test_dataset",
                                   device=self.device)

        pred_data = self.G(partial_data)
        pred_data = partial_data + pred_data * mask

        SNR = self.signal_to_noise(full_data, pred_data)
        self.writer.add_scalar('testing SNR', SNR, counter)

        print(("Recovery SNR for real part (testing data): %4.4f" % (SNR)))
Пример #22
0
def test(threshold, model_name='model'):
    print("Loading data...")
    test_data = utils.load_test_data(test_file, me, ms, mr)
    test_example_num = len(test_data["input_ids"])
    print("Done.")

    with tf.Session() as sess:
        model = Model(
            max_entity_num=me,
            max_sentence_num=ms,
            max_relation_num=mr,
            max_seq_length=FLAGS.max_seq_length,
            class_num=len(rel2id),
            entity_types=len(ner2id),
            bert_config=bert_config,
            hidden_size=FLAGS.hidden_size,
            hidden_layers=FLAGS.hidden_layers,
            attention_heads=FLAGS.attention_heads,
            intermediate_size=FLAGS.intermediate_size,
            hidden_dropout_prob=bert_config.hidden_dropout_prob,
            attention_probs_dropout_prob=bert_config.
            attention_probs_dropout_prob,
            graph_hidden_dropout_prob=FLAGS.graph_hidden_dropout_prob,
            graph_attention_probs_dropout_prob=FLAGS.
            graph_attention_probs_dropout_prob,
        )

        saver = tf.train.Saver()
        checkpoint = os.path.join(checkpoint_dir, model_name)
        saver.restore(sess, checkpoint)

        test_logits = []
        test_index = []
        for batch_index in tqdm(
                utils.batch_iter(test_example_num, FLAGS.batch_size, False)):
            feed_dict = {
                model.input_ids: test_data["input_ids"][batch_index],
                model.input_mask: test_data["input_mask"][batch_index],
                model.segment_ids: test_data["segment_ids"][batch_index],
                model.entity_mask: test_data["entity_mask"][batch_index],
                model.entity_types: test_data["entity_types"][batch_index],
                model.sentence_mask: test_data["sentence_mask"][batch_index],
                model.attention_mask: test_data["attention_mask"][batch_index],
                model.relation_mask: test_data["relation_mask"][batch_index],
                model.head_mask: test_data["head_mask"][batch_index],
                model.tail_mask: test_data["tail_mask"][batch_index],
                model.is_training: False
            }
            logit = sess.run(model.sigmoid, feed_dict)
            test_logits.append(logit)
            test_index += batch_index
        test_logits = np.concatenate(test_logits, axis=0)

    utils.inference(test_logits, test_data, test_index, threshold)
Пример #23
0
    def test_loadData(self):
        test_data = load_test_data()
        training_data = load_training_data()

        self.assertEqual(len(training_data), 7, msg='Column number incorrect')
        self.assertEqual(len(test_data), 7, msg='Column number incorrect')

        self.assertEqual(len(training_data['t']),
                         956,
                         msg='Row number incorrect')
        self.assertEqual(len(test_data['p']), 506, msg='Row number incorrect')
Пример #24
0
def cdssm_api(qpool, logdir, dataset_path, train_id, parameter):
    keras.backend.clear_session()
    # load数据并创建preprocessor对象
    train_pack = load_train_data(train_id, parameter['existing_dataset'], parameter['task'])
    predict_pack = load_test_data(train_id, parameter['existing_dataset'], parameter['task'])
    preprocessor = mz.preprocessors.CDSSMPreprocessor()
    # 重定向stderr到log文件
    logdir.set_preprocess_id(train_id)
    err_old = sys.stderr
    sys.stderr = logdir
    # preprocessor.fit的内容写出到log,写完后关闭重定向,保存preprocessor
    train_pack_processed = preprocessor.fit_transform(train_pack)
    sys.stderr = err_old
    preprocessor.save(ROOT_PATH + 'matchzoo_temp_files/preprocessors/' + train_id + '.cdssm_preprocessor')
    predict_pack_processed = preprocessor.transform(predict_pack)
    with open(ROOT_PATH + 'matchzoo_temp_files/logger/' + train_id + '.preprocess_log', 'a') as f:
        f.write('Preprocess finished!')
    ranking_task = mz.tasks.Ranking(loss=mz.losses.RankCrossEntropyLoss(num_neg=4))
    ranking_task.metrics = [
        mz.metrics.NormalizedDiscountedCumulativeGain(k=3),
        mz.metrics.NormalizedDiscountedCumulativeGain(k=5),
        mz.metrics.MeanAveragePrecision()
    ]
    model = mz.models.CDSSM()
    model.params['input_shapes'] = preprocessor.context['input_shapes']
    model.params['task'] = ranking_task
    model.params['filters'] = parameter['filters']
    model.params['kernel_size'] = parameter['kernel_size']
    model.params['strides'] = parameter['strides']
    model.params['padding'] = parameter['padding']
    model.params['conv_activation_func'] = parameter['conv_activation_func']
    model.params['w_initializer'] = parameter['w_initializer']
    model.params['b_initializer'] = parameter['b_initializer']
    model.params['mlp_num_layers'] = parameter['mlp_num_layers']
    model.params['mlp_num_units'] = parameter['mlp_num_units']
    model.params['mlp_num_fan_out'] = parameter['mlp_num_fan_out']
    model.params['mlp_activation_func'] = parameter['mlp_activation_func']
    model.params['dropout_rate'] = 0.8
    model.params['optimizer'] = 'adadelta'
    model.guess_and_fill_missing_params()
    model.guess_and_fill_missing_params()
    model.build()
    model.compile()
    model.backend.summary()
    pred_x, pred_y = predict_pack_processed[:].unpack()
    evaluate = mz.callbacks.EvaluateAllMetrics(model, x=pred_x, y=pred_y, batch_size=len(pred_x))
    train_generator = mz.PairDataGenerator(train_pack_processed, num_dup=1, num_neg=4, batch_size=64, shuffle=True)
    # 重定向stdout到log当中
    qpool.set_trainid(train_id)
    old = sys.stdout
    sys.stdout = qpool
    model.fit_generator(train_generator, epochs=parameter['epochs'], callbacks=[evaluate], workers=5, use_multiprocessing=False)
    sys.stdout = old
    model.save(ROOT_PATH + 'matchzoo_temp_files/models/' + train_id + '.cdssm_model')
Пример #25
0
def test_model(classifier, date, num_epochs):
    """ Restore the classifier from given data and number of epochs, and run on test set

    :param classifier: Object with class derived from BaseModel
    :param date: str, timestamp that is the name of the folder containing classifier's data
    :param num_epochs: int, the epoch number from which checkpoint is to be retrieved.
    :return: None
    """
    test_labels, test_ids, test_texts = load_test_data(
        load_texts=(classifier.model_type != ModelType.image_only))
    classifier.test(date, num_epochs, test_ids, test_labels, test_texts)
Пример #26
0
 def setup_method_fixture(self, test_env):
     """Create bare git repo and a pool for removing shared directories."""
     self.directories = []
     self.env_data = utils.load_test_data("cached_dependencies.yaml")["cached_package"]
     self.git_user = self.env_data["test_repo"].get("git_user")
     self.git_email = self.env_data["test_repo"].get("git_email")
     if self.env_data["test_repo"].get("use_local"):
         repo_path = create_local_repository(self.env_data["test_repo"]["ssh_url"])
         self.env_data["test_repo"]["ssh_url"] = repo_path
         # Defer cleanups
         self.directories.append(repo_path)
Пример #27
0
def build_login_data():
    # 创建数据列表
    test_list = []
    # 读取数构造数据列表
    json_data = utils.load_test_data("login.json")
    test_data = json_data.get("test_login")
    for data in test_data:
        test_list.append((data.get("username"), data.get("pwd"),
                          data.get("code"), data.get("expect")))
    logging.info(test_list)
    # 返回数据列表
    return test_list
Пример #28
0
    def __init__(self, root='.', train=True, transform=None, target_transform=None, download=False):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.target_transform = target_transform
        self.train = train  # training set or test set

        if self.train:
            self.train_data, self.train_labels = load_train_data(self.root)
            self.train_labels = torch.LongTensor(self.train_labels)
        else:
            self.test_data, self.test_labels = load_test_data(self.root)
            self.test_labels =  torch.LongTensor(self.test_labels)
Пример #29
0
 def test(self):
     test_data, __ = utils.load_test_data()
     # raw = test_data['input']
     # test_data['input'] = np.array(raw, dtype=np.float32) / 255.0
     # utils.save_data_as_lmdb('cifar6_test_data_lmdb', test_data, True)
     result = self.__get_predicted_output(
         'alexnet_result_6.prototxt', 'cifar3_6_iter_120000.caffemodel.h5')
     res = np.zeros(len(result), dtype=int)
     for i in xrange(len(result)):
         res[i] = (np.argmax(result[i]))
         # print res[i]
     # print len(res)
     np.savetxt("results6.csv", res.astype(dtype=int))
Пример #30
0
def nfe(args):
    run = Experiment.from_dir(args.run, main='model')
    print(run)
    results_file = run.path_to('nfe.csv.gz')
    best_ckpt_file = run.ckpt('best')

    results = pd.DataFrame()
    # check if results exists and are updated, then skip the computation
    if os.path.exists(results_file
                      ) and os.path.getctime(results_file) >= os.path.getctime(
                          best_ckpt_file) and not args.force:
        results = pd.read_csv(results_file,
                              float_precision='round_trip').round({'t1': 2})

    test_data = load_test_data(run)
    test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

    model = load_model(run)
    model = model.to(args.device)
    model.eval()

    def _nfe(test_loader, model, t1, tol, args):
        model.odeblock.t1 = t1
        model.odeblock.tol = tol

        y_true = []
        y_pred = []
        nfes = []

        for x, y in tqdm(test_loader):
            y_true.append(y.item())
            y_pred.append(model(x.to(args.device)).argmax(dim=1).item())
            nfes.append(model.nfe(reset=True))

        return {'y_true': y_true, 'y_pred': y_pred, 'nfe': nfes}

    progress = tqdm(itertools.product(args.tol, args.t1))
    for tol, t1 in progress:
        if 't1' in results.columns and 'tol' in results.columns and (
            (results.t1 == t1) & (results.tol == tol)).any():
            print(f'Skipping tol={tol} t1={t1} ...')
            continue

        progress.set_postfix({'tol': tol, 't1': t1})
        result = _nfe(test_loader, model, t1, tol, args)
        result = pd.DataFrame(result)
        result['t1'] = t1
        result['tol'] = tol
        results = results.append(result, ignore_index=True)
        results.to_csv(results_file, index=False)
Пример #31
0
def generate_train_dataset(datapath, return_pos=False, test=False):
    if (test):
        pos, data = utils.load_test_data(datapath)
        train_x = parser_x(data)
        if (not return_pos):
            return train_x
        else:
            return pos, train_x
    else:
        pos, data, label = utils.load_data_new(datapath)
        train_x, train_y = parser(data, label)
        if (not return_pos):
            return train_x, train_y
        else:
            return pos, train_x, train_y
Пример #32
0
    def anime2selfie(self, file_input):
        tf.global_variables_initializer().run(session=self.sess)
        self.saver = tf.train.Saver()
        could_load, checkpoint_counter = self.load(self.checkpoint_dir)
        if could_load :
            print(" [*] Load SUCCESS")
        else :
            print(" [!] Load failed...")
            return None

        sample_image = np.asarray(load_test_data(file_input, size=self.img_size))
        fake_img = self.sess.run(self.test_fake_A, feed_dict = {self.test_domain_B : sample_image})

        save_images(fake_img, [1, 1], '/tmp/a.jpg')
        return 
Пример #33
0
def load_data(params):
    '''
    load data from pkl files. The data can be divided into three types:
    1. public data: this is the public attribute data for the experiment
    2. train  data: this is used to train model
    3. test data: this is used to test the model function
    :param params:
    :return:
    '''

    poi_dist_mat, cat_sim_mat, poi_cat_dict, poi_loc_dict, s_u, s_KG = load_public_data(
        params)

    poi_list_train, user_list_train, temporal_context_train = load_train_data(
        params)

    poi_list_test, user_list_test, temporal_context_test = load_test_data(
        params)

    # unify the data digit type
    temporal_context_train = temporal_context_train.float()
    temporal_context_test = temporal_context_test.float()
    s_u = s_u.float()
    s_KG.x = s_KG.x.float()
    s_KG.edge_attr = s_KG.edge_attr.float()

    train_dataset = TensorDataset(user_list_train, poi_list_train,
                                  temporal_context_train)
    test_dataset = TensorDataset(user_list_test, poi_list_test,
                                 temporal_context_test)

    train_loader = DataLoader(train_dataset,
                              batch_size=params.data_batch_size,
                              collate_fn=collate_wrapper,
                              pin_memory=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=params.data_batch_size,
                             collate_fn=collate_wrapper,
                             pin_memory=True)

    poi_info = POI_Info(poi_dist_mat, cat_sim_mat, poi_cat_dict, poi_loc_dict,
                        temporal_context_train[0].view(-1, 3).shape[0])
    user_KG = User_KG(s_u, s_KG)

    return train_loader, test_loader, poi_info, user_KG
Пример #34
0
    def test_hashes(self, tmpdir):
        # `tmpdir` is a `py.path.local` object which offers `os.path` methods,
        # see http://pytest.org/latest/tmpdir.html for details.
        tdata = utils.load_test_data("trietest.txt")
        for tdatum in tdata:
            inputs = tdatum["inputs"]
            expected = tdatum["expectation"]

            # Prepare the Trie
            db = tmpdir.ensure("tdb-%s" % random.randrange(1000000), dir=True)
            t0 = trie.Trie(db.strpath)
            for k, v in inputs.items():
                t0.update(k, v)
            # The actual test
            actual = t0.root.encode('hex')
            assert expected == actual, (
                "inputs='%s', expected='%s', actual='%s'" %
                (inputs, expected, actual))
Пример #35
0
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Data Preparatopn
# ==================================================

# Load data
print("Loading data...")
vocab, embeddings = utils.load_embeddings()
train_data = utils.load_train_data(vocab, FLAGS.sequence_length)
test_data = utils.load_test_data(vocab, FLAGS.sequence_length)
print("Load done...")

# Training
# ==================================================

prev_auc = 0
with tf.Graph().as_default():
  with tf.device("/gpu:1"):
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = InsQACNN(
            _margin=FLAGS.margin,
Пример #36
0
import pickle
from utils import load_test_data, dependencies
from algorithm import Model
from optparse import OptionParser

parser = OptionParser()
parser.add_option("-i", "--input", action="store", type="string", dest="input", default="example_test.txt")
parser.add_option("-m", "--model", action="store", type="string", dest="model", default="model.pkl")
parser.add_option("-o", "--output", action="store", type="string", dest="output")

(options, args) = parser.parse_args()

m = pickle.load(open(options.model))
X = load_test_data(options.input)
print "num of testing samples:", len(X), "\n"


if options.output:
    f = open(options.output, "wb")
    for i, _ in enumerate(m.predict_proba(X)):
        f.write(str(i) + "\n")
        for (p, a) in reversed(sorted(zip(_, m.classes_))):
            f.write(a + " : " + str(p) + "\n")
        f.write("--\n")
else:
    for i, _ in enumerate(m.predict_proba(X)):
        print i
        for (p, a) in reversed(sorted(zip(_, m.classes_))):
            print a, ":", p
        print "--"