def test_cluster1(): """ Testing for cluster, using test data """ cluster = Cluster(epoch=32, size=500, tokenizer="tokenize") pipeline = Pipeline() for item in pipeline.capture_item(): cluster.put_item(item) cluster.cluster() # assert '캠프' in list(map(list, zip(*cluster.similar('노무현'))))[0] # assert '사건' in list(map(list, zip(*cluster.similar('박근혜'))))[0] assert len(cluster.clusters) == len(test_text) assert cluster.vectors.shape == (len(test_text), 500) assert len(cluster.unique) <= len(test_text) assert len(cluster.unique) == len(cluster.dumps) for dump in cluster.dumps: items, vectors, counter = zip(*dump) for item in items: assert isinstance(item, Item) pipeline.dress_item(items)
def test_app3(): """ Testing for cluster, using test data """ cluster = Cluster(tokenizer="tokenize") pipe = PipelineCsv(test_csv) for item in pipe.capture_item(): cluster.put_item(item) cluster.cluster() extractor = Extractor(cluster) for idx, dump in enumerate(cluster.dumps): items, vectors, counter = map(list, zip(*dump)) extracted = extractor.dump(idx) pipe.dress_item(extracted) print (cluster.distribution)
def test_app1(): """ Testing for cluster, using test data """ cluster = Cluster(epoch=32, tokenizer="stemize") pipeline = PipelineFile() for item in pipeline.capture_item(): cluster.put_item(item) cluster.cluster() extractor = Extractor(cluster) for idx, dump in enumerate(cluster.dumps): items, vectors, counter = map(list, zip(*dump)) extracted = extractor.dump(idx) assert isinstance(extracted.keywords, list) pipeline.dress_item(extracted)
def test_extractor1(): cluster = Cluster(epoch=32, tokenizer="tokenize") pipeline = Pipeline() for item in pipeline.capture_item(): cluster.put_item(item) cluster.cluster() extractor = Extractor(cluster) for idx, dump in enumerate(cluster.dumps): items, vectors, counter = map(list, zip(*dump)) assert set(['items', 'vectors', 'counter', 'center', 'keywords']) == set(extractable.s.keys()) extracted = extractor.dump(idx) assert isinstance(extracted, Item) assert isinstance(extracted.keywords, list) assert 32 == len(extracted.keywords)