PARSER.save(tree_path) # かかり受け解析したものをファイルに保存 print("Indexを読み込みます...") VECTORIZER = Vectorizer(index_path, t=1, list=whitelist) # Indexの読み込み print('Treeを読み込みます') vectors = VECTORIZER.get_vector([tree_path + '/' + root + '.eda'], filter=3) # ベクトルを生成 print(vectors) print('Vectorを保存します') VECTORIZER.save(vectors, [vector_path]) # ベクトルを保存 #----- # いまもっているTFIDFコーパスベクトル群と、クエリベクトルtfidf_vectorsを比較 #---- print('TFIDF corpus Vectorsを読み込みます') tfidf_corpus_vectors = VECTORIZER.load( sorted(glob.glob(tfidf_DB_path + '/*.vector'))) print(tfidf_corpus_vectors) print('IDF Vectorを読み込みます') IDF_vector = VECTORIZER.load_IDF(IDF_path) print(IDF_vector) print('クエリをTFIDF化します...') tfidf_vectors = vectors * IDF_vector print(tfidf_vectors) print('クエリのTFIDF Vectorを保存します:保存先は') print(tfidf_vector_path) VECTORIZER.save(tfidf_vectors, [tfidf_vector_path]) print('類似度')
if len(sys.argv) == 1: sys.exit() # Load Raw Data Files df_users = pd.read_csv(get_data_file_path('users')) df_jobs = pd.read_csv(get_raw_file_path('jobs')) u = int(sys.argv[1]) #326 index = np.where(df_users['applicant_id'] == u)[0][0] user_query = df_users.iloc[[index]] print('[User Information -------------------------------]') print(df_users[df_users['applicant_id'] == u]) vectorizer = Vectorizer.load(get_pickle_file_path('vectorizer')) tfidf_jobs = pickle.load(open(get_pickle_file_path('tfidf'), "rb")) tfidf_user = vectorizer.transform((user_query['text'])) cos_similarity_tfidf = list(map(lambda x: cosine_similarity(tfidf_user, x), tfidf_jobs)) def get_recommendation(top, df_all, scores): recommendation = pd.DataFrame(columns = ['ApplicantID', 'JobID', 'title', 'job_description', 'score']) count = 0 for i in top: recommendation.at[count, 'ApplicantID'] = u recommendation.at[count, 'JobID'] = df_all['Job.ID'][i] recommendation.at[count, 'title'] = df_all['Title'][i] recommendation.at[count, 'job_description'] = df_all['Job.Description'][i]