Python scrape_to_file 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils

메소드/함수: scrape_to_file

hotexamples.com에서의 예제들: 2

Python scrape_to_file - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.scrape_to_file에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: walla_handler.py 프로젝트: zigelitamar/Sport-news-scrapper

    def get_article_pages(self, teams_pages):
        """From teams website pages, scraping the images and article urls

        Args:
            teams_pages (list): the teams main pages in One website

        Returns:
            [dict]: Dictionary of article details (article url, image, source)
        """
        team_address_str = ', '.join(teams_pages)

        scrape_to_file('WallaArticlePathes', team_address_str, 'out.json')

        all_teams_article_dict = {}

        with open('out.json', encoding="utf-8", errors='ignore') as json_file:
            data = json.load(json_file)
            for team_article_address in data:
                zip_iterator = zip(
                    team_article_address['articles'], [{
                        'image': image[2:],
                        'team': {team_article_address["team"]}
                    } for image in team_article_address['images']])
                articles_images_dict = dict(zip_iterator)
                # for article in team_article_address['articles']:
                #     all_teams_article_dict.update(articles_images_dict)
                for article in articles_images_dict.keys():
                    if all_teams_article_dict.get(article):
                        all_teams_article_dict[article][
                            'team'] = all_teams_article_dict.get(article)[
                                'team'] | articles_images_dict[article]['team']
                    else:
                        all_teams_article_dict[article] = articles_images_dict[
                            article]
        return all_teams_article_dict

예제 #2

파일 보기

파일: one_handler.py 프로젝트: zigelitamar/Sport-news-scrapper

    def get_full_articles(self, article_urls):
        """Scraping full article details from the urls in article_urls.

        Args:
            article_urls (list): list of articles urls to scrape from the website

        Returns:
            [ArticleModel]: List of Article models.
        """
        if not article_urls:
            return []

        articles = []
        articles_teams_dictionary = {}
        articles_address_str = ', '.join(article_urls)

        scrape_to_file('ArticleOne', articles_address_str, 'articles.json')

        with open('articles.json', encoding="utf-8",
                  errors='ignore') as json_file:
            try:
                data = json.load(json_file)

            except:
                print("something went wrong with an article at One")
                return []

            for article in data:
                try:
                    article['article_image'] = article_urls[
                        article['url']].get('image')
                    # article['teams'] = ', '.join(list(article_urls[article['url']].get('team')))
                    article['article_teams'] = []
                    articles_teams_dictionary[article['url']] = list(
                        article_urls[article['url']].get('team'))
                    article_object = ArticleModel(**article)
                    articles.append(article_object)
                except:
                    article['article_image'] = ''
                    article['team'] = ''

            if articles:
                ArticleModel.save_to_db_bulk(articles)

            urls = [article.url for article in articles]

            print(urls)

            for url in urls:
                article_obj = ArticleModel.find_by_articles_url_one(url)
                article_obj.article_teams = TeamModel.find_by_teams_names(
                    articles_teams_dictionary[url])
                article_obj.save_to_db()

            return articles