def gen_config_from_ui(self): config = utils.AppConfig() """ Driver """ if self.radioButton_chrome_headless.isChecked(): config.driver = "chrome_headless" """ Output directory """ config.output_dir = self.lineEdit_output.text() """ Switches """ config.face_only = self.checkBox_face_only.isChecked() config.safe_mode = self.checkBox_safe_mode.isChecked() """ Numbers """ config.max_number = self.spinBox_max_number.value() config.num_threads = self.spinBox_num_threads.value() """ Keywords List """ if self.checkBox_from_file.isChecked(): keywords_list = '' str_path = '' str_path = self.lineEdit_path2file.text() #str_path = self.lineEdit_keywords.text() if len(str_path) == 0: messagebox.showinfo("提示", "请完成文件读入功能") self.close() else: keywords_list = utils.gen_keywords_list_from_file(str_path) else: # str_keywords = self.lineEdit_path2file.text() str_keywords = self.lineEdit_keywords.text() keywords_list = utils.gen_keywords_list_from_str(str_keywords) return config, keywords_list
def gen_config_from_ui(self): config = utils.AppConfig() """ Engine """ if self.radioButton_google.isChecked(): config.engine = "Google" elif self.radioButton_bing.isChecked(): config.engine = "Bing" elif self.radioButton_baidu.isChecked(): config.engine = "Baidu" """ Driver """ if self.radioButton_chrome_headless.isChecked(): config.driver = "chrome_headless" elif self.radioButton_chrome.isChecked(): config.driver = "chrome" elif self.radioButton_phantomjs.isChecked(): config.driver = "phantomjs" """ Output directory """ config.output_dir = self.lineEdit_output.text() """ Switches """ config.face_only = self.checkBox_face_only.isChecked() config.safe_mode = self.checkBox_safe_mode.isChecked() """ Numbers """ config.max_number = self.spinBox_max_number.value() config.num_threads = self.spinBox_num_threads.value() """ Proxy """ if self.checkBox_proxy.isChecked(): if self.radioButton_http.isChecked(): config.proxy_type = "http" elif self.radioButton_socks5.isChecked(): config.proxy_type = "socks5" config.proxy = self.lineEdit_proxy.text() else: config.proxy_type = None config.proxy = None """ Keywords List """ if self.checkBox_from_file.isChecked(): str_path = self.lineEdit_path2file.text() keywords_list = utils.gen_keywords_list_from_file(str_path) else: str_keywords = self.lineEdit_keywords.text() keywords_list = utils.gen_keywords_list_from_str(str_keywords, ",") return config, keywords_list
def main(argv): parser = argparse.ArgumentParser(description="Image Downloader") parser.add_argument( "keywords", type=str, help= 'Keywords to search. ("in quotes") or in file if provided path will be prioritized (line separated)' ) parser.add_argument("--engine", "-e", type=str, default="Google", help="Image search engine.", choices=["Google", "Bing", "Baidu"]) parser.add_argument("--driver", "-d", type=str, default="chrome_headless", help="Image search engine.", choices=["chrome_headless", "chrome", "phantomjs"]) parser.add_argument("--max-number", "-n", type=int, default=100, help="Max number of images download for the keywords.") parser.add_argument( "--num-threads", "-j", type=int, default=50, help="Number of threads to concurrently download images.") parser.add_argument("--timeout", "-t", type=int, default=20, help="Seconds to timeout when download an image.") parser.add_argument("--output", "-o", type=str, default="./download_images", help="Output directory to save downloaded images.") parser.add_argument( "--safe-mode", "-S", action="store_true", default=False, help="Turn on safe search mode. (Only effective in Google)") parser.add_argument("--face-only", "-F", action="store_true", default=False, help="Only search for ") parser.add_argument("--proxy_http", "-ph", type=str, default=None, help="Set http proxy (e.g. 192.168.0.2:8080)") parser.add_argument("--proxy_socks5", "-ps", type=str, default=None, help="Set socks5 proxy (e.g. 192.168.0.2:1080)") parser.add_argument("--jobs", "-m", type=int, default=8, help="Set number of processes") args = parser.parse_args(args=argv) proxy_type = None proxy = None if args.proxy_http is not None: proxy_type = "http" proxy = args.proxy_http elif args.proxy_socks5 is not None: proxy_type = "socks5" proxy = args.proxy_socks5 if os.path.exists(args.keywords): print(f"File exists: {args.keywords}") keywords = utils.gen_keywords_list_from_file(args.keywords) keywords = list(map(lambda x: x.replace("\n", ""), keywords)) print( f"requested keywords: {keywords} - Total requested: {len(keywords)}" ) # every item that is already in the directory and was requested existing_keywords = [ scraped for scraped in os.listdir(args.output) if scraped in keywords ] print( f"Skipping existing keywords: {existing_keywords} - Total skipped: {len(existing_keywords)}" ) # every item that was requested and is not existing already keywords = [ keyword for keyword in keywords if keyword not in existing_keywords ] print( f"Scraping following file:{args.keywords}, keywords:{keywords} - started processing: {len(keywords)}" ) else: keywords = args.keywords.split(',') print(f"Scraping keywords:{keywords}") # for keyword in keywords: Parallel(n_jobs=args.jobs)( delayed(run_keyword)(keyword, args, proxy, proxy_type) for keyword in keywords) print("Finished.")