def post(self): try: formData = request.json ingredients = [ x.strip() for x in formData['ingredients'].split(',') ] X_count = vectorizer.transform(ingredients) countvector = X_count.toarray().astype(int) x_ingredients = np.array([sum(x) for x in zip(*countvector)]) cooking_steps = lemmatize_text(clean_text( formData['cookingSteps'])) x_cooking = tfidf_vectorizer.transform([cooking_steps ]).toarray()[0] x = np.concatenate([x_ingredients, x_cooking]) prediction = classifier.predict(x.reshape(1, -1)) types = {0: "Bad Recipe", 1: "Good Recipe"} response = jsonify({ "statusCode": 200, "status": "Prediction made", "result": "Recipe Quality Prediction: " + types[prediction[0]] }) response.headers.add('Access-Control-Allow-Origin', '*') return response except Exception as error: return jsonify({ "statusCode": 500, "status": "Could not make prediction", "error": str(error) })
def parse(self, content_list, image_list, feeder_running, parser_running): name = current_process().name self._logger.info(name, '크롬 드라이버 로딩 중..') parser_driver = webdriver.Chrome(self._chromedriver) parser_driver.implicitly_wait(5) parser_driver.get('https://cyworld.com') for cookie in self._cookie: parser_driver.add_cookie(cookie) self._logger.info(name, '크롬 드라이버 로딩 완료') while feeder_running.value or len(content_list) != 0: try: if len(content_list) != 0: # 공유 리스트에서 게시물 URL 추출 및 접속 target_url = content_list.pop(0) self._logger.info(name, target_url) parser_driver.get(target_url) # 필요한 데이터 추출 date = parser_driver \ .find_element_by_css_selector('div.view1 p') images = parser_driver \ .find_elements_by_css_selector('section.imageBox') texts = parser_driver \ .find_elements_by_css_selector('section.textBox') # 원본 제목 title = parser_driver \ .find_element_by_id('cyco-post-title') \ .get_attribute('innerText') # 파일 저장을 위해 전처리한 제목 (파일명으로 사용됨) preprocessed_title = to_valid_filename(title) # 게시글 날짜 업로드 날짜 post_date = extract_date(date.get_attribute('innerText')) # 게시글 데이터 병합 post_text = '[ {} ]\n\n'.format(title) for text in texts: current_text = text.get_attribute('innerText').strip() if len(current_text): post_text += clean_text(current_text) + '\n' # 이미지 목록 추출 for image in images: imgs = image.find_elements_by_tag_name('img') for img in imgs: src = update_size(img.get_attribute('src')) image_list.append({ 'title': preprocessed_title, 'date': post_date, 'content': post_text, 'src': src }) self._logger.info( name, '{}_{} 포스트 파싱 됨'.format(post_date, title)) # 싸이월드 서버 부하 방지를 위해 잠시 대기 time.sleep(1) except Exception as e: self._logger.error(str(e)) parser_running.value = 0 parser_driver.close() self._logger.info(name, '종료')
# count vectorizer vectorizer = CountVectorizer() X_count = vectorizer.fit_transform(df['recipe_ingredients']) countvector = X_count.toarray().astype(int) df['ingedients_vector'] = countvector.tolist() # # tfidf # tfidf_transformer = TfidfTransformer() # X_tfidf = tfidf_transformer.fit_transform(X_count) # df['ingedients_tfidf_vector'] = X_tfidf.toarray().tolist() ingredients = vectorizer.get_feature_names() # Cooking Steps df['cooking_steps_all'] = df['cooking_steps'].apply( lambda x: " ".join(ast.literal_eval(x))) df['cooking_steps_clean'] = [clean_text(i) for i in df['cooking_steps_all']] df['cooking_steps_lemma'] = [ lemmatize_text(i) for i in df['cooking_steps_clean'] ] tfidf_vectorizer, x_tfidf, features = tfidf(df['cooking_steps_lemma']) df_cooking_steps = pd.DataFrame(x_tfidf.toarray(), columns=features) df = pd.concat([df, df_cooking_steps], axis=1) # Target df['is_good_recipe'] = df['is_good_recipe'].astype(int) # Split the dataset into features and labels y = np.array(df['is_good_recipe']) x_ingredients = np.array( df['ingedients_vector'].tolist()) # ingedients_tfidf_vector x_cooking = df[features].to_numpy()