Exemplo n.º 1
0
    def crawl(self, food_detail_df):
        print('[Recipe Web Crawling Start]')
        for index in range(len(food_detail_df)):

            food = food_detail_df.foodName[index]
            recipe = self, recipe_finder(food, 2)
            food_detail_df.loc[index, 'foodRecipe'] = str(recipe)

            if (index + 1) % 5 == 0:
                print(round((index + 1) / len(food_df) * 100, 2),
                      'percent Done')

        print('Complete!!')
        print('')
        print('[noun extract start]')

        food_detail_df['foodRecipeNoun'] = ''
        for i in range(len(food_detail_df)):

            doc = food_detail_df.foodRecipe[i]
            noun = Hannanum().nouns(doc)
            cnt = Counter(noun)
            only_word = []
            for key, value in cnt.items():
                if int(value) < 3:
                    noun.remove(key)
            for word in noun:
                m = re.match('^\D*\D$', word)
                if m:
                    only_word.append(m.group())

            food_detail_df.loc[i, 'foodRecipeNoun'] = str(only_word)
            if (i % 5) == 0:
                print(round(i / len(food_detail_df) * 100, 2), ' perent done')
        print('Complete')
Exemplo n.º 2
0
    def NLP(self, food_detail_df):

        print('[noun extract start]')

        food_detail_df['foodRecipeNoun'] = ''
        for i in range(len(food_detail_df)):

            doc = food_detail_df.foodRecipe[i]
            noun = Hannanum().nouns(doc)

            for word in noun:
                word = word.replace('ㅎ', '').replace('ㅋ', '').replace(
                    'ㅜㅜ', '').replace('ㅠㅠ', '').replace('\\n', '')

            cnt = Counter(noun)
            only_word = []
            for key, value in cnt.items():

                #if (len(key) < 2)|(len(key) > 6):
                #noun.remove(key)

                if int(value) < 3:
                    noun.remove(key)

            for word in noun:

                m = re.match('^\D*\D$', word)
                if m:
                    only_word.append(m.group())

            food_detail_df.loc[i, 'foodRecipeNoun'] = str(only_word)
            if (i % 5) == 0:
                print(round(i / len(food_detail_df) * 100, 2), ' perent done')
        print('Complete')