示例#1
0
 def requestAPIForURL(amount):
     amount = float(amount)
     i = amount / 10
     j = amount // 10
     needPages = int(i) if i == j else int(i) + 1
     result = []
     for i in range(1, 1 + needPages):
         APIURL = "http://www.laohucaijing.com/laohu_index1/ajax_news_list/?page=%s" % i
         html = crawlUtils.requestJsonWithProxy(APIURL)["html"]
         links = laohuCrawlMethod.GET_LINK_REGEX.findall(
             html.replace("\/", "/"))
         for j in set(links):
             if "author_detail" not in j:
                 result.append("http://www.laohucaijing.com%s" % j)
     return result
示例#2
0
 def requestAPIForURL(amount):
     amount = float(amount)
     i = amount / 20
     j = amount // 20
     needPages = int(i) if i == j else int(i) + 1
     result = []
     for i in range(1, 1 + needPages):
         try:
             APIURL = "http://channel.inewsweek.chinanews.com/u/zk.shtml?pager=%s" % i
             jsonData = crawlUtils.requestJsonWithProxy(APIURL,
                                                        needCut=True)
             result += [x["url"] for x in jsonData["docs"]]
         except:
             pass
     return result
 def requestAPIForURL(amount):
     amount = float(amount)
     i = amount / 10
     j = amount // 10
     needPages = int(i) if i == j else int(i) + 1
     result = []
     for i in range(1, 1 + needPages):
         try:
             APIURL = "http://api.chinaipo.com/zh-hans/api/articles/?page=%s" % i
             jsonData = crawlUtils.requestJsonWithProxy(APIURL)
             for j in jsonData["results"]:
                 originalId = j["originalId"]
                 result.append("http://api.chinaipo.com/zh-hans/api/article/?originalId=%s" % originalId)
         except:
             pass
     return result
示例#4
0
 def requestAPIForURL(amount):
     amount = float(amount)
     i = amount / 10
     j = amount // 10
     needPages = int(i) if i == j else int(i) + 1
     result = []
     for i in range(1, 1 + needPages):
         try:
             APIURL = "http://app.eeo.com.cn/?app=wxmember&controller=index&action=getMoreArticle&catid=3572" \
                      "&allcid=358818,358815,358809,358808,358799,358795,358777,358775,358767,358763,358761,358740," \
                      "358732,358730,358718,358712&page=%s" % i
             jsonData = crawlUtils.requestJsonWithProxy(APIURL, needCut=True)
             links = [x["url"] for x in jsonData["article"]]
             result += links
         except:
             pass
     return result
 def requestAPIForURL(amount):
     amount = float(amount)
     i = amount / 7
     j = amount // 7
     needPages = int(i) if i == j else int(i) + 1
     result = []
     homePage = crawlUtils.requestWithProxy("https://www.weiyangx.com")[0]
     nonce = weiyangCrawlMethod.REGEX_FINDING_NONCE.findall(homePage)[0]
     for i in range(1, 1 + needPages):
         APIURL = "https://www.weiyangx.com/wp-admin/admin-ajax.php"
         jsonData = crawlUtils.requestJsonWithProxy(
             APIURL,
             needCut=True,
             method="post",
             payload={
                 "action": "home_load_more_news",
                 "postOffset": i * 8,
                 "tagId": 0,
                 "_ajax_nonce": nonce
             })
         result += [x["url"] for x in jsonData["data"]]
     return result
示例#6
0
 def requestAPIForURL(amount):
     APIURL = "http://app.ikanchai.com/roll.php?do=more&status=1&sort=0&pagesize=%s&page=0" % amount
     jsonData = crawlUtils.requestJsonWithProxy(APIURL, needCut=True)
     result = [x["url"] for x in jsonData["data"]]
     return result