示例#1
0
def test_initialize():
    """Test initialization of CRFDataset object"""
    attrs = CRFDict(KEYS1)
    labels = CRFDict(KEYS2)

    data = CRFDataset(attrs, labels)

    data_attrs = data.get_feature_list()
    for i in range(len(KEYS1)):
        assert KEYS1[i] == data_attrs[i]

    data_labels = data.get_label_list()
    for i in range(len(KEYS2)):
        assert KEYS2[i] == data_labels[i]
示例#2
0
def test_algorithms(n_samples=100, n_features=200, n_instances=10):
    X = np.random.random((n_samples, n_features))
    X[np.where(X < 0.8)] = 0
    X = csr_matrix(X)

    labels = np.random.randint(10, size=n_samples)

    instances = np.linspace(0, n_samples, n_samples / n_instances + 1)[:-1]
    instances = np.round(instances)

    data = CRFDataset()
    data.add_group_from_array(X, labels, instances)

    for algorithm in ALGORITHMS:
        trainer = CRFTrainer(algorithm=algorithm, quiet=True)
        trainer.train(data)
示例#3
0
def test_matrix_conversion(n_samples=50, n_features=100, n_instances=10):
    """Test conversion of csr matrix to and from CRFDataset"""
    X = np.random.random((n_samples, n_features))
    X[np.where(X < 0.8)] = 0

    X = csr_matrix(X)

    labels = np.random.randint(len(KEYS1), size=n_samples)

    instances = np.linspace(0, n_samples, n_samples / n_instances + 1)[:-1]
    instances = np.round(instances)

    data = CRFDataset()
    data.add_group_from_array(X, labels, instances)

    mat = data.to_matrix()

    assert_array_almost_equal(mat.toarray(), X.toarray())
示例#4
0
文件: test.py 项目: TPLink32/nlp
from crfsuite import crfsuite_learn, CRFDataset, CRFDict, CRFTagger

n_samples = 100
n_features = 100
n_instances = 10
n_labels = 10

features_per_sample = 5

data = np.zeros((n_samples, n_features))

indices = np.random.randint(n_features, size=(n_samples, features_per_sample))

data[np.arange(n_samples)[:, None], indices] += 1

labels = np.random.randint(n_labels, size=n_samples)
instances = np.linspace(0, n_samples, n_instances + 1)
instances = np.floor(instances[:-1]).astype(int)

#crf_data = CRFDataset().add_group_from_array(data, labels, instances)
crf_data = CRFDataset().add_groups_from_files('example_files/train_small.txt')

model = crfsuite_learn(crf_data)

crf_data_test = model.get_tagging_data_from_file(
    'example_files/test_small.txt')

tagger = model.get_tagger()

output = tagger.tag(crf_data_test)