def crawl_mariage(): from mariage import request message = "링크 : http://www.hauts-de-seine.gouv.fr/booking/create/4485" criterias = ["Il n'existe plus de plage horaire libre"] crawl(criterias, request, message) return
def crawl_94(): from _94_ import request url_to_send = os.environ["URL_TO_SEND_94"] criterias = ["Aucun rendez-vous n\\'est disponible", 'message:"Error"'] crawl(criterias, request) return
def crawl_92(): from _92_ import request criterias = [ "Aucun rendez-vous n'est disponible", "vous reconnecter" ] crawl(criterias, request) return
def get_page(self, url): proxy = random.choice(self.proxies) try: html = crawl(url, ua_used=self.ua_used, proxy=proxy, method='get') except Exception as e: log_text = "invalid url, crawl %s failed" % (url) logging.error(log_text) html = None return html
def get_page(self, url): proxy = random.choice(self.proxies) try: html = crawl(url, ua_used=self.ua_used, proxy=proxy, method='get') html = html.decode("utf-8", 'ignore') except Exception as e: logging.warning("crawl %s failed,e=%s", url, e) html = None return html
def on_press(self, event): value = self.text_ctrl.GetValue() if not value: print("You didn't enter anything!") else: self.text_ctrl.Hide() png = wx.Image('img/whatever.png', wx.BITMAP_TYPE_ANY).ConvertToBitmap() wx.StaticBitmap(self, -1, png, (0, 0), (png.GetWidth(), png.GetHeight())) if os.path.exists("result.json"): os.remove("result.json") wordlist = utils.extract(value) words = ",".join(wordlist) path = utils.getPath() utils.crawl(words) output = utils.process() utils.writelist(output, path) png = wx.Image('img/finish.png', wx.BITMAP_TYPE_ANY).ConvertToBitmap() wx.StaticBitmap(self, -1, png, (0, 0), (png.GetWidth(), png.GetHeight()))
#!/usr/bin/env python3 import utils import os if os.path.exists("result.json"): os.remove("result.json") with open("input.txt", 'r') as f: contents = f.readlines() wordlist = [] for line in contents: wordlist.append(line.rstrip()) words = ",".join(wordlist) utils.crawl(words) output = utils.process() utils.writelist(output, "output.txt")