Python FeatureVectorizer.list_datasets示例

编程语言: Python

命名空间/包名称: freediscovery.engine.vectorizer

方法/功能: list_datasets

hotexamples.com的示例: 2

Python FeatureVectorizer.list_datasets - 已找到2个示例。这些是从开源项目中提取的最受好评的freediscovery.engine.vectorizer.FeatureVectorizer.list_datasets现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

FeatureVectorizer(27)

ingest(25)

setup(25)

_load_features(10)

delete(8)

append(2)

list_datasets(2)

parse_email_headers(2)

remove(2)

_filenames(1)

query_features(1)

示例#1

显示文件

def test_search_filenames(use_hashing):
    cache_dir = check_cache()

    fe = FeatureVectorizer(cache_dir=cache_dir, mode='w')
    uuid = fe.setup(use_hashing=use_hashing)
    fe.ingest(data_dir, file_pattern='.*\d.txt')

    assert fe.db_ is not None

    for low, high, step in [(0, 1, 1), (0, 4, 1), (3, 1, -1)]:
        idx_slice = list(range(low, high, step))
        filenames_slice = [fe.filenames_[idx] for idx in idx_slice]
        idx0 = fe.db_._search_filenames(filenames_slice)
        assert_equal(idx0, idx_slice)
        assert_equal(filenames_slice, fe[idx0])

    with pytest.raises(NotFound):
        fe.db_._search_filenames(['DOES_NOT_EXIST.txt'])

    if not use_hashing:
        n_top_words = 5
        terms = fe.query_features([2, 3, 5], n_top_words=n_top_words)
        assert len(terms) == n_top_words

    fe.list_datasets()

示例#2

显示文件

def _list(args):
    cache_dir = _parse_cache_dir(args.cache_dir)
    fe = FeatureVectorizer(cache_dir)
    res = fe.list_datasets()
    res = sorted(res, key=lambda row: row['creation_date'], reverse=True)
    for row in res:
        print(' * Processed dataset {}'.format(row['id']))
        print('    - data_dir: {}'.format(row['data_dir']))
        print('    - creation_date: {}'.format(row['creation_date']))
        for method in ['lsi', 'categorizer', 'cluster', 'dupdet', 'threading']:
            dpath = os.path.join(fe.cache_dir, row['id'], method)
            if not os.path.exists(dpath):
                continue
            mid_list = os.listdir(dpath)
            if mid_list:
                print('     # {}'.format(method))
                for mid in mid_list:
                    print('       * {}'.format(mid))
        print(' ')