Пример #1
0
 def extract(self):
     for setting_name, setting in self.__setting.items():
         for base_url, selector, pattern in itertools.zip_longest(setting["base_url"], setting["selector"],
                                                                  setting["pattern"]):
             if base_url:
                 if pattern is None:
                     pattern = setting["pattern"][len(setting["pattern"]) - 1]
                 if selector is None:
                     selector = setting["selector"][len(setting["selector"]) - 1]
                 time.sleep(self.interval)
                 self.__web.set_target(base_url)
                 try:
                     html = self.__web.start_request()
                     dom = htmldom.HtmlDom().createDom(html)
                     table = dom.find(selector)
                     if table.len == 0:
                         raise SettingReader.ProxySettingError("invalid selector!", selector)
                     td = threading.Thread(target=self._find_proxy, args=(table, pattern))
                     td.start()
                     self.__tasks.append(td)
                     # self._find_proxy(table, pattern)
                 except Exception as err:
                     logger.log(logger.BASIC, str(err))
     for task in self.__tasks:
         task.join()
Пример #2
0
 def __extract_baseurl_mode2(self, base_url):
     """
     :return extracted base urls list.
     """
     urls = []
     for item_url, selector, pattern, attr in itertools.zip_longest(base_url["base_url"],
                                                                    base_url["selector"],
                                                                    base_url["pattern"],
                                                                    base_url["container_attr"]):
         if item_url:
             if pattern is None:
                 pattern = base_url["pattern"][len(base_url["pattern"])-1]
             if selector is None:
                 selector = base_url["selector"][len(base_url["selector"])-1]
             if attr is None:
                 attr = base_url["container_attr"][len(base_url["container_attr"])-1]
             time.sleep(self.interval)
             self.__web.set_target(item_url)
             try:
                 html = self.__web.start_request()
                 dom = htmldom.HtmlDom().createDom(html)
                 table = dom.find(selector)
                 x = table.html()
                 for i in base_url["sequence"]:
                     if attr:
                         urls.append(absolute_url(item_url, table[int(i)].attr(attr)))
                     else:
                         url_sm = re.search(pattern, table[int(i)].html())
                         urls.append(absolute_url(item_url, url_sm.group(1)))
             except Exception as err:
                 logger.log(logger.BASIC, str(err))
     return urls
Пример #3
0
 def check(self):
     tmp = []
     try:
         for item in self.candidates:
             timeout = self.get_delay_time(item["ip"])
             if timeout:
                 item["responsetime"] = float(timeout)
                 tmp.append(item)
                 logger.log(logger.DETAIL,
                            "got a candidate ip:{0} delay:{1}s".format(item["ip"], item["responsetime"]))
     except Exception as err:
         logger.log(logger.BASIC, err)
     self.candidates = tmp
Пример #4
0
 def get_candidate(self, host_index):
     wc = webclient.WebClient()
     test_conf = self._test_conf[host_index]
     host_url, key = self._host[host_index]
     wc.set_target(host_url, timeout=self.timeout)
     for item in self.key_list:
         response = wc.start_request(test_conf[0], test_conf[1], item)
         if response:
             result = self.parse_response(response, host_index)
             if result != {}:
                 if result not in self.candidates:
                     self.candidates.append(result)
                     logger.log(logger.DETAIL,
                                "got an alpha ip:{0}".format(result["ip"]))
Пример #5
0
 def hunt(self, host_index):
     """
     host_index must be self.super_ping or self.chinaz.
     :param host_index:
     :return:a list with checked ips.
     """
     wc = webclient.WebClient()
     host_url, key = self._host[host_index]
     wc.set_target(host_url, timeout=self.timeout)
     result = wc.start_request(key[1], data=key[0].format(self.target))
     try:
         dom = htmldom.HtmlDom().createDom(result)
         for item in dom.find("script"):
             if self.found_key_data(item.text(), host_index):
                 self._parser[host_index](item.text())
     except Exception as err:
         logger.log(logger.BASIC, err)
     self.get_candidate(host_index)
     self.check()
     return self.candidates
Пример #6
0
 def start_request(self, method="GET",data=None):
     """
     :param method:
     :param data: data to send to server.
     :return: str
     """
     raw_data = None
     try:
         self.__client.setup(post=data)
         answer = self.__client.request()
         if answer.code != 200:
             logger.warning(
                 "failed to get response from server! error: {0}\n".format(http.client.responses[answer.code]))
         raw_data = answer.unicode_body()
     except Exception as err:
         logger.log(logger.DETAIL, str(err) + " when request to {0}\n".format(
             self.__target.netloc + self.__target.path))  # when error happened, it often
         # means there is still a connection
         # thus just ignore it.
     return raw_data
Пример #7
0
 def _find_proxy(self, ip_table, pattern):
     """
     :param ip_table: list of proxy with junk data.
     :param pattern: to search ip and port.
     :return:
     """
     valid_pattern = False
     for ip_item in ip_table:
         try:
             ip_item = ip_item.html().replace("\n", "").replace("\r", "").replace("\s", "")
             while re.search(pattern, ip_item):
                 valid_pattern = True
                 ip_port_sm = re.search(pattern, ip_item)
                 if is_validate(*ip_port_sm.groups(), target=self.target, check=self.check):
                     if ip_port_sm.groups() not in self.__proxies:
                         self.__proxies.append(ip_port_sm.groups())
                         print(self.format.format(*ip_port_sm.groups()), end="", file=self.out, flush=True)
                 ip_item = ip_item[ip_port_sm.end(2):]
         except Exception as err:
             logger.log(logger.BASIC, str(err))
     if valid_pattern is False:
         raise SettingReader.ProxySettingError("invalid pattern!", pattern)
Пример #8
0
 def start_request(self, method="GET", url="", data=""):
     """
     :param method:
     :param url:
     :param data: data to send to server.
     :return: str
     """
     url = self.__target.path + url
     raw_data = None
     try:
         self.__client.connect()
         if method == "GET":
             data = url + data
             data = str.replace(data, r'//', r'/')
             self.__client.putrequest(method, data)
             data = ""
         else:
             self.__client.putrequest(method, url)
         data = urllib.parse.quote(data, '=&')
         self.__client.putheader('Content-Length', len(data))
         self.__put_def_header()
         self.__put_client_header()
         self.__client.endheaders()
         self.__client.send(data.encode())
         anwser = self.__client.getresponse()
         if anwser.status != 200:
             logger.warning(
                 "failed to get response from server! error: {0}".format(http.client.responses[anwser.status]))
         raw_data = str(anwser.read().decode())
     except Exception as err:
         logger.log(logger.DETAIL, str(err) + " when request to {0}".format(
             self.__target.netloc + self.__target.path))  # when error happened, it often
         # means there is still a connection
         # thus just ignore it.
     self.__client.close()
     return raw_data