def getAppcoda(): dataArray = [] imageArr = [] i = 0 url = "https://www.appcoda.com/tutorials/ios/" result = requests.get(url) soup = BeautifulSoup(result.text,"html.parser") div_set = soup.find_all("div",class_="post-thumbnail") for div in div_set: txt = div.find("div").get("style") # 正则表达式 p1 = r'https://.*g' # 将正则表达式编译成Pattern对象 pattern = re.compile(p1) imageUrl = str(pattern.findall(txt)) # print(type(imageUrl)) imageArr.append(str(imageUrl)) div_set2 = soup.find_all("div",class_="post-content") for div in div_set2: title = div.find("h2").get_text() eachUrl = div.find("h2").find("a").get("href") dataArray.append(dataModel(title, eachUrl, imageArr[i], "swift")) model = dataModel(title, eachUrl, imageArr[i][2:-2], "swift") model.printIt() i += 1 try: model.updateToInfo() except: continue return dataArray
def getHackeingWithSwift(): dataArray = [] websiteUrl = "https://www.raywenderlich.com" url = "https://www.raywenderlich.com/library/search?section_id=49&domain_ids%5B%5D=1&content_types%5B%5D=article&category_ids%5B%5D=156&category_ids%5B%5D=159&category_ids%5B%5D=157&category_ids%5B%5D=151&category_ids%5B%5D=145&category_ids%5B%5D=161&category_ids%5B%5D=143&category_ids%5B%5D=147&category_ids%5B%5D=155&category_ids%5B%5D=144&category_ids%5B%5D=158&category_ids%5B%5D=148&category_ids%5B%5D=150&category_ids%5B%5D=152&category_ids%5B%5D=160&category_ids%5B%5D=149&category_ids%5B%5D=153&category_ids%5B%5D=154&category_ids%5B%5D=146&sort_order=released_at&page=1" result = requests.get(url) soup = BeautifulSoup(result.text,"html.parser") title_set = soup.find_all("span",class_="c-tutorial-item__title") titleArray = [] urlArray = [] imageurlArray = [] for title in title_set: titleArray.append(title.getText()) a_set = soup.find_all("a") for a in a_set: urlArray.append(websiteUrl + a.get("href")) image_set = soup.find_all(class_="c-tutorial-item__art-image--primary") for image in image_set: imageurlArray.append(image.get("src")) for index in range(0,10): dataArray.append(dataModel(titleArray[index],urlArray[index],imageurlArray[index],"swift")) model = dataModel(titleArray[index],urlArray[index],imageurlArray[index],"swift") model.printIt() try: model.updateToInfo() except: continue return dataArray
def getAppso(): url = "https://www.ifanr.com/app" result = requests.get(url) dataArray = [] imageArray = [] i = 0 soup = BeautifulSoup(result.text, "html.parser") div_set = soup.find_all("div", class_="article-info") img_set = soup.find_all("a", class_="article-link cover-block") for img in img_set: txt = str(img['style']) # 正则表达式 p1 = r'https://.*260' # 将正则表达式编译成Pattern对象 pattern = re.compile(p1) imageUrl = str(pattern.findall(txt)) imageArray.append(imageUrl) for div in div_set: title = div.find("h3").find("a").get_text() eachUrl = div.find("h3").find("a").get("href") model = dataModel(title, eachUrl, imageArray[i][2:-2], 'AppSolution') try: model.updateToInfo() model.printIt() except: continue i += 1
def getSouhuArticles(): dataArray = [] imageUrlArray = [] i = 0 url = "http://it.sohu.com/882?spm=smpc.ch30.fd-ctag.24.1556018818504jdhz" result = requests.get(url) soup = BeautifulSoup(result.text, "html.parser") div_set = soup.find_all(attrs={"data-role": "news-item"}) for img in div_set: txt = str(img['class']) # 正则表达式 p1 = r'txt' # 将正则表达式编译成Pattern对象 pattern = re.compile(p1) if (pattern.findall(txt)): noImg = None imageUrlArray.append(noImg) else: otherImg = "https:" + img.find("img").get("src") imageUrlArray.append(otherImg) title = img.find("h4").find("a").get_text().strip() eachUrl = "https:" + img.find("h4").find("a").get("href") model = dataModel(title, eachUrl, imageUrlArray[i], 'TechnologyArticles') try: model.updateToInfo() model.printIt() except: continue i += 1
def getSwiftOrg(): dataArray = [] url = "https://swift.org/blog/" result = requests.get(url) soup = BeautifulSoup(result.text, "html.parser") div_set = soup.find_all("h1", class_="title") for div in div_set: title = div.get_text() eachUrl = "https://swift.org/blog/" + div.get_text("href") # imageUrl = "https://www.hackingwithswift.com" + div.find("img").get("src") dataArray.append(dataModel(title, eachUrl, None, "swift")) model = dataModel(title, eachUrl, "", "swift") model.printIt() try: model.updateToInfo() except: continue return dataArray
def getHackeingWithSwift(): dataArray = [] url = "https://www.hackingwithswift.com/articles" result = requests.get(url) soup = BeautifulSoup(result.text, "html.parser") div_set = soup.find_all("a") for div in div_set: h3 = div.find("h3") if h3 != None: title = h3.get_text() eachUrl = "https://www.hackingwithswift.com" + div.get("href") imageUrl = "https://www.hackingwithswift.com" + div.find( "img").get("src") dataArray.append(dataModel(title, eachUrl, imageUrl, "swift")) model = dataModel(title, eachUrl, imageUrl, "swift") model.printIt() try: model.updateToInfo() except: continue return dataArray
def getSspai(): url = "https://sspai.com/api/v1/articles?offset=0&limit=20&has_tag=1&tag=%E5%BA%94%E7%94%A8%E6%8E%A8%E8%8D%90&include_total=false&type=recommend_to_home" list = requests.get(url).json() dataArray = [] llist = list['list'] for i in range(len(llist)): title = llist[i]['title'] eachUrl = "https://sspai.com/post/" + str(llist[i]['id']) imageUrl = "https://cdn.sspai.com/" + llist[i]['banner'] model = dataModel(title, eachUrl, imageUrl, 'ResourceRecommend') try: model.updateToInfo() model.printIt() except: continue
def getZuori(): dataArray = [] url = "https://www.anquanke.com/" data = requests.get(url) soup = BeautifulSoup(data.text, "html.parser") div_set = soup.find_all("div", class_="col col-9 col-xs-9 col-sm-8 col-md-8 col-lg-6 col-xl-6 common-item-left") for div in div_set: title = div.find_next_sibling().find("div").find("div").find("a").get_text() eachUrl = "https://www.anquanke.com" + div.find("a").get("href") imageUrl = div.find("a").find("div").find("div").find("img").get("data-original") model = dataModel(title, eachUrl, imageUrl, 'NetworkSecurity') try: model.updateToInfo() model.printIt() except: continue
def getNshipster(): url = "https://nshipster.com" result = requests.get(url) soup = BeautifulSoup(result.text,"html.parser") # div_set = soup.find_all("a",class_="title") li_set = soup.find("section", id="recent").find_all("li") for li in li_set: title = li.find("a").get_text() text = li.find("p").get_text() eachUrl = url + li.find("a").get("href") model = dataModel(title, eachUrl, "", "swift") model.printIt() try: model.updateToInfo() except: continue
def getPythonGithubTrending(): url = "https://github.com/trending/python?since=daily" res = requests.get(url) soup = BeautifulSoup(res.text, "html.parser") projectList = soup.findAll("article", class_="Box-row") for project in projectList: title = project.find('h1').find('a').get("href")[1:] project_url = "https://github.com/" + title try: describe = project.find('p', class_="col-9 text-gray my-1 pr-4").get_text().rstrip().lstrip() + " " + title except: describe = title model = dataModel(describe, project_url, "", "python") model.printIt() try: model.updateToInfo() except: continue
def getPconline(): dataArray = [] url = "https://mobile.pconline.com.cn/pry/" result = requests.get(url) result.encoding = "gbk" soup = BeautifulSoup(result.text, "html.parser") div_set = soup.find("div", class_="art-list art-list-cut").find_all( "a", class_="img-area") for div in div_set: title = div.find("img").get("alt") eachUrl = div.get("href") imageUrl = "https:" + div.find("img").get("#src") model = dataModel(title, eachUrl, imageUrl, 'Phone') try: model.updateToInfo() model.printIt() except: continue
def getZol(): dataArray = [] url = "http://safe.zol.com.cn/more/2_1628.shtml" data = requests.get(url) soup = BeautifulSoup(data.text, "html.parser") div_set = soup.find_all("div", class_="info-mod clearfix") for div in div_set: title = div.find("a").find("img").get("alt") eachUrl = div.find("a").get("href") if (div.find("a").find("img").get(".src") != None): imageUrl = div.find("a").find("img").get(".src") else: imageUrl = div.find("a").find("img").get("src") model = dataModel(title, eachUrl, imageUrl, 'NetworkSecurity') try: model.updateToInfo() model.printIt() except: continue
def getPcbeta(): url = "http://www.pcbeta.com/news/" headers = ("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45") opener = urllib.request.build_opener() opener.addheaders = [headers] data = opener.open(url).read().decode('gbk') dataArray = [] soup = BeautifulSoup(data,"html.parser") div_set = soup.find_all("a", class_="thumb") for div in div_set: if(div.find("img").get("title")): title = div.find("img").get("title") eachUrl = div.get("href") imageUrl = div.find("img").get("src") model = dataModel(title, eachUrl, imageUrl, 'Computer') try: model.updateToInfo() model.printIt() except: continue
def getIgao7(): dataArray = [] imageurlArray = [] i = 0 url = "http://m.igao7.com/category/all" result = requests.get(url) soup = BeautifulSoup(result.text, "html.parser") img_set = soup.find_all("div", class_="pic") for img in img_set: imageurlArray.append(img.find("img").get("src")) div_set = soup.find_all("div", class_="name clr") for div in div_set: title = div.find("span", class_="hd").get_text() eachUrl = div.parent.get("href") model = dataModel(title, eachUrl, imageurlArray[i], 'Phone') try: model.updateToInfo() model.printIt() except: continue i += 1