def save_vectors_file(): data = load_data(FLAGS.data_path) vectorizer = Vectorizer() logging.info('getting vectors') img_vectors = [] genders = [] for img_path, gender_id in tqdm(data.items()): try: img_array = get_img(img_path) vector = vectorizer.get_vector(img_array) img_vectors.append(vector) genders.append(gender_id) except Exception as e: logging.warning('exception: {}'.format(e)) vectorizer.close() dim_reduction_technique = get_dim_reduction_technique( FLAGS.dim_reduction_technique) reduced, model = dim_reduction_technique(img_vectors, FLAGS.n_dimensions) save_pkl_file(model, FLAGS.reducter_path) save_pkl_file((reduced, genders), FLAGS.vectors_path)
def main(): img = get_img(FLAGS.img_path) vectorizer = Vectorizer() vector = vectorizer.get_vector(img) vectorizer.close() reducter = load_pkl_file(FLAGS.reducter_path) reduced = reducter.transform([vector]) model = load_pkl_file(FLAGS.model_path) output = model.predict(reduced)[0] print('result: {}'.format(output))
print('前処理を行います') PREPROCESSOR.load_text([text_path]) whitelist = PREPROCESSOR.investigate_whitelist(thesaurus_path) print('保存します') PREPROCESSOR.save(auto_text_path) PARSER = Parser() print('かかり受け解析を行います..') PARSER.t2f([auto_text_path + '/' + root + '.text'], kytea_model=kytea_path, eda_model=eda_path) print('結果を保存します') PARSER.save(tree_path) # かかり受け解析したものをファイルに保存 print("Indexを読み込みます...") VECTORIZER = Vectorizer(index_path, t=1, list=whitelist) # Indexの読み込み print('Treeを読み込みます') vectors = VECTORIZER.get_vector([tree_path + '/' + root + '.eda'], filter=3) # ベクトルを生成 print(vectors) print('Vectorを保存します') VECTORIZER.save(vectors, [vector_path]) # ベクトルを保存 #----- # いまもっているTFIDFコーパスベクトル群と、クエリベクトルtfidf_vectorsを比較 #---- print('TFIDF corpus Vectorsを読み込みます') tfidf_corpus_vectors = VECTORIZER.load( sorted(glob.glob(tfidf_DB_path + '/*.vector'))) print(tfidf_corpus_vectors) print('IDF Vectorを読み込みます') IDF_vector = VECTORIZER.load_IDF(IDF_path)
kytea_model=kytea_path, eda_model=eda_path) # text_pathのファイルをかかり受け解析 print('結果を保存します') PARSER.save(tree_path) # かかり受け解析したものをファイルに保存 INDEX = Index(unigram=1, dep_trigram=1, bigram=1, dep_bigram=1) # Indexをunigramとbigramの素性を、treeから読み出すことでIndexを作成する print('Treeを読み込みます') INDEX.add_index(sorted(glob.glob(tree_path + '/*'))) # tree_pathのフォルダ以下のファイルからインデックスを作る print('INDEXを保存します...') INDEX.save(index_path) # index_pathにインデックスを保存 print(index_path) print("Indexを読み込みます...") VECTORIZER = Vectorizer(index_path, t=1, list=whitelist) # Indexの読み込み # 閾値は1 print('Treeを読み込みます') vectors = VECTORIZER.get_vector(sorted(glob.glob(tree_path + '/*')), filter=3) # ベクトルを生成 print(vectors) print('Vectorを保存します') filename_list = sorted(glob.glob(tree_path + '/*')) vector_path_list = [] for filename in filename_list: base_name = os.path.basename(filename) # A.text root = os.path.splitext(base_name)[0] # A file_name = vector_folder_path + '/' + root + '.vector' vector_path_list.append(file_name) VECTORIZER.save(vectors, vector_path_list) # ベクトルを保存 print(vector_path_list) IDF = VECTORIZER.calculate_IDF(vectors) IDF_path = '../auto/IDF.index' VECTORIZER.save_IDF(IDF, IDF_path)