def save_vectors_file():
    data = load_data(FLAGS.data_path)

    vectorizer = Vectorizer()

    logging.info('getting vectors')
    img_vectors = []
    genders = []
    for img_path, gender_id in tqdm(data.items()):
        try:
            img_array = get_img(img_path)

            vector = vectorizer.get_vector(img_array)

            img_vectors.append(vector)
            genders.append(gender_id)
        except Exception as e:
            logging.warning('exception: {}'.format(e))

    vectorizer.close()

    dim_reduction_technique = get_dim_reduction_technique(
        FLAGS.dim_reduction_technique)

    reduced, model = dim_reduction_technique(img_vectors, FLAGS.n_dimensions)

    save_pkl_file(model, FLAGS.reducter_path)
    save_pkl_file((reduced, genders), FLAGS.vectors_path)
示例#2
0
def main():
    img = get_img(FLAGS.img_path)

    vectorizer = Vectorizer()
    vector = vectorizer.get_vector(img)
    vectorizer.close()

    reducter = load_pkl_file(FLAGS.reducter_path)
    reduced = reducter.transform([vector])

    model = load_pkl_file(FLAGS.model_path)

    output = model.predict(reduced)[0]

    print('result: {}'.format(output))
示例#3
0
print('前処理を行います')
PREPROCESSOR.load_text([text_path])
whitelist = PREPROCESSOR.investigate_whitelist(thesaurus_path)
print('保存します')
PREPROCESSOR.save(auto_text_path)
PARSER = Parser()
print('かかり受け解析を行います..')
PARSER.t2f([auto_text_path + '/' + root + '.text'],
           kytea_model=kytea_path,
           eda_model=eda_path)
print('結果を保存します')
PARSER.save(tree_path)  # かかり受け解析したものをファイルに保存
print("Indexを読み込みます...")
VECTORIZER = Vectorizer(index_path, t=1, list=whitelist)  # Indexの読み込み
print('Treeを読み込みます')
vectors = VECTORIZER.get_vector([tree_path + '/' + root + '.eda'],
                                filter=3)  # ベクトルを生成
print(vectors)
print('Vectorを保存します')
VECTORIZER.save(vectors, [vector_path])  # ベクトルを保存

#-----
# いまもっているTFIDFコーパスベクトル群と、クエリベクトルtfidf_vectorsを比較
#----

print('TFIDF corpus Vectorsを読み込みます')
tfidf_corpus_vectors = VECTORIZER.load(
    sorted(glob.glob(tfidf_DB_path + '/*.vector')))
print(tfidf_corpus_vectors)

print('IDF Vectorを読み込みます')
IDF_vector = VECTORIZER.load_IDF(IDF_path)
示例#4
0
           kytea_model=kytea_path,
           eda_model=eda_path)  # text_pathのファイルをかかり受け解析
print('結果を保存します')
PARSER.save(tree_path)  # かかり受け解析したものをファイルに保存
INDEX = Index(unigram=1, dep_trigram=1, bigram=1,
              dep_bigram=1)  # Indexをunigramとbigramの素性を、treeから読み出すことでIndexを作成する
print('Treeを読み込みます')
INDEX.add_index(sorted(glob.glob(tree_path +
                                 '/*')))  # tree_pathのフォルダ以下のファイルからインデックスを作る
print('INDEXを保存します...')
INDEX.save(index_path)  # index_pathにインデックスを保存
print(index_path)
print("Indexを読み込みます...")
VECTORIZER = Vectorizer(index_path, t=1, list=whitelist)  # Indexの読み込み  # 閾値は1
print('Treeを読み込みます')
vectors = VECTORIZER.get_vector(sorted(glob.glob(tree_path + '/*')),
                                filter=3)  # ベクトルを生成
print(vectors)
print('Vectorを保存します')
filename_list = sorted(glob.glob(tree_path + '/*'))
vector_path_list = []
for filename in filename_list:
    base_name = os.path.basename(filename)  # A.text
    root = os.path.splitext(base_name)[0]  # A
    file_name = vector_folder_path + '/' + root + '.vector'
    vector_path_list.append(file_name)
VECTORIZER.save(vectors, vector_path_list)  # ベクトルを保存
print(vector_path_list)

IDF = VECTORIZER.calculate_IDF(vectors)
IDF_path = '../auto/IDF.index'
VECTORIZER.save_IDF(IDF, IDF_path)