示例#1
0
def send_tfidf_creation_task(project_pk):
    """Create and Save tfidf"""
    from core.utils.utils_model import create_tfidf_matrix, save_tfidf_matrix, save_tfidf_vectorizer

    tf_idf, vectorizer = create_tfidf_matrix(project_pk)
    file = save_tfidf_matrix(tf_idf, project_pk)
    save_tfidf_vectorizer(vectorizer, project_pk)

    return file
示例#2
0
def test_project_gnb_data_tfidf(db, test_profile, tmpdir, settings):
    """This fixture only creates the test project without any data."""
    proj = create_project("test_project", test_profile, classifier="gnb")
    test_data = read_test_data_backend(file="./core/data/test_files/test_no_labels.csv")
    add_data(proj, test_data)

    Data.objects.filter(project=proj)
    matrix = create_tfidf_matrix(proj.pk)[0]

    data_temp = tmpdir.mkdir("data").mkdir("tf_idf")
    settings.TF_IDF_PATH = str(data_temp)

    save_tfidf_matrix(matrix, proj.pk)
    return proj
示例#3
0
def test_project_svm_data_tfidf(db, test_profile, tmpdir, settings):
    '''
    This fixture only creates the test project without any data.
    '''
    proj = create_project('test_project', test_profile, classifier="svm")
    test_data = read_test_data_backend(
        file='./core/data/test_files/test_no_labels.csv')
    add_data(proj, test_data)

    Data.objects.filter(project=proj)
    matrix = create_tfidf_matrix(proj.pk)[0]

    data_temp = tmpdir.mkdir('data').mkdir('tf_idf')
    settings.TF_IDF_PATH = str(data_temp)

    save_tfidf_matrix(matrix, proj.pk)

    return proj
示例#4
0
def test_tfidf_vectorizer_labeled(test_project_labeled):
    """A CSR-format tf-idf matrix created from the data of test_project_data."""
    Data.objects.filter(project=test_project_labeled)
    return create_tfidf_matrix(test_project_labeled.pk)[1]
示例#5
0
def test_tfidf_matrix(test_project_data):
    """A CSR-format tf-idf matrix created from the data of test_project_data."""
    Data.objects.filter(project=test_project_data)
    return create_tfidf_matrix(test_project_data.pk)[0]
示例#6
0
def test_tfidf_matrix_labeled(test_project_labeled):
    '''
    A CSR-format tf-idf matrix created from the data of test_project_data
    '''
    Data.objects.filter(project=test_project_labeled)
    return create_tfidf_matrix(test_project_labeled.pk)[0]