if self.cache_manager: self.cache_manager.cache_results(scrape.parser, scrape.query, scrape.search_engine_name, scrape.scrape_method, scrape.page_number) if scrape.parser: serp = parse_serp(self.config, parser=scrape.parser, scraper=scrape, query=scrape.query) if self.scraper_search: self.scraper_search.serps.append(serp) if self.session: self.session.add(serp) self.session.commit() store_serp_result(serp, self.config) print("----------------------------end def run(self):-----------------------------") if __name__ == '__main__': from GoogleScraper.config import get_config from GoogleScraper.scrape_jobs import default_scrape_jobs_for_keywords some_words = get_some_words(n=1) cfg = get_config() scrape_jobs = list(default_scrape_jobs_for_keywords(some_words, ['bing'], 'http-async', 1)) manager = AsyncScrapeScheduler(cfg, scrape_jobs) manager.run()
#!/usr/bin/python3 # -*- coding: utf-8 -*- import random import pprint from GoogleScraper.utils import get_some_words def random_word(): return random.choice(words) words = get_some_words(n=100) pprint.pprint(words)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from GoogleScraper import scrape_with_config, GoogleSearchError from GoogleScraper.utils import get_some_words keywords = get_some_words(10) with open("keywords.txt", "wt") as f: for word in keywords: f.write(word + "\n") # See in the config.cfg file for possible values config = { "use_own_ip": True, "keyword_file": "keywords.txt", "search_engines": ["bing", "duckduckgo"], "num_pages_for_keyword": 2, "scrape_method": "http-async", "do_caching": True, "output_filename": "out.csv", } try: search = scrape_with_config(config) except GoogleSearchError as e: print(e) # let's inspect what we got. Get the last search: for serp in search.serps: print(serp) for link in serp.links:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from GoogleScraper import scrape_with_config, GoogleSearchError from GoogleScraper.utils import get_some_words keywords = get_some_words(10) with open('keywords.txt', 'wt') as f: for word in keywords: f.write(word + '\n') # See in the config.cfg file for possible values config = { 'use_own_ip': True, 'keyword_file': 'keywords.txt', 'search_engines': ['bing', 'duckduckgo'], 'num_pages_for_keyword': 2, 'scrape_method': 'http-async', 'do_caching': True, 'output_filename': 'out.csv', } try: search = scrape_with_config(config) except GoogleSearchError as e: print(e) # let's inspect what we got. Get the last search: for serp in search.serps: print(serp) for link in serp.links: