Python get_test_document_term_map示例

编程语言: Python

命名空间/包名称: text_categorizer.document_vectorization

方法/功能: get_test_document_term_map

hotexamples.com的示例: 2

Python get_test_document_term_map - 已找到2个示例。这些是从开源项目中提取的最受好评的text_categorizer.document_vectorization.get_test_document_term_map现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： prepare_resources.py 项目： ragnhildkarlsson/keyword_tc

def prepare_freq_dists(experiment_spec, freq_dists_cache_directory):
    freq_dist_map_id = document_vectorization.get_freq_dist_map_id(experiment_spec)
    if cache.in_cache(freq_dists_cache_directory, freq_dist_map_id):
        print( "FREQDISTS stored in cache: " + freq_dist_map_id)
        return
    test_data_id = dataset_id_handler.get_test_data_id(experiment_spec)
    preprocessing_filter_names =  experiment_spec["training_dataset"]["filters"]
    test_document_term_map = document_vectorization.get_test_document_term_map(test_data_id,preprocessing_filter_names)
    index_types = ["word", "bigram", "trigram"]
    freq_dist_map = document_vectorization.get_freq_dists_map(test_document_term_map,index_types)
    pprint.pprint(freq_dist_map)
    cache.write(freq_dists_cache_directory,freq_dist_map_id,freq_dist_map)

示例#2

显示文件

文件： prepare_resources.py 项目： ragnhildkarlsson/keyword_tc

def prepare_tf_idf_vectors(experiment_spec,tf_idf_cache_dirctory,index_cache_directory):
    # Create test data handler

    tf_idf_vector_map_id = document_vectorization.get_tf_idf_map_id(experiment_spec)
    if cache.in_cache(tf_idf_cache_dirctory, tf_idf_vector_map_id):
        print( "TF_IDF_VECTORS stored in cache: " + tf_idf_vector_map_id)
        return
    test_data_id = dataset_id_handler.get_test_data_id(experiment_spec)
    preprocessing_filter_names =  experiment_spec["training_dataset"]["filters"]
    test_docuement_term_map = document_vectorization.get_test_document_term_map(test_data_id,preprocessing_filter_names)
    print("test data preprocessed")
    index_id_index_type_map = __get_index_id_index_type(experiment_spec["training_dataset"])
    index_types = ["word", "bigram", "trigram"]
    max_freq_map = index_factory.create_max_freq_term_by_index_types(test_docuement_term_map, index_types)
    print("max_freq_map_calculated")
    tf_idf_vector_map = document_vectorization.get_docs_id_tf_idf_map(test_docuement_term_map, index_id_index_type_map, index_cache_directory,max_freq_map)
    pprint.pprint(tf_idf_vector_map)
    cache.write(tf_idf_cache_dirctory, tf_idf_vector_map_id, tf_idf_vector_map)