def get_player_static(self,url,pozisyon): a=1 b=[] print(self.base_url[:-1]+url.replace('profil','leistungsdaten')) client = Request(self.base_url[:-1]+url.replace('profil','leistungsdaten'), headers={"User-Agent" : "Mozilla/5.0"}) page = urlopen(client).read() soup =soup1(page, 'html.parser') for i in soup.find_all('tfoot'): if a==1: c=i.find_all('td') c=str(c).split('</td>') for j in range(2,len(c)-1): b.append(c[j][c[j].index('>')+1:]) else: pass a=a+1 a=1 ozellik=[] print(pozisyon) if pozisyon=="Kaleci": b.insert(2,"-") else: b.append("-") b.append("-") print(len(b)) b.clear()
def get_player(self,url,takim):#url ve takim adı alır a=0 pozisyon=[] client = Request(url, headers={"User-Agent" : "Mozilla/5.0"}) page = urlopen(client).read() soup =soup1(page, 'html.parser') for tr in soup.findAll('tbody'): liste=[] ozellik="" if a==1: for i in tr.find_all('td'): b=i.get_text() if "€" in b : ozellik =ozellik+b+"+" c=ozellik.split("+") yas=c[5].split("(")[1][:-1] dogum_tarihi=c[5].split("(")[0] c.pop(1) c.insert(4,dogum_tarihi) c.insert(5,yas) c.pop(6) self.register(c,takim) pozisyon.append(c[3]) #print(url) ozellik="" else: ozellik=ozellik+b+"+" sayac=0 p_sayac=0 for i in tr.find_all('a'): if "profil" in i.get('href'): if sayac%2==0: self.get_player_static(i.get('href'),pozisyon[p_sayac]) p_sayac=p_sayac+1 sayac =sayac+1 a=a+1
def get_team(self): for i in self.my_url: url=self.base_url+i client = Request(url, headers={"User-Agent" : "Mozilla/5.0"}) page = urlopen(client).read() soup =soup1(page, 'html.parser') for link in soup.find_all('a'): if "/kader/verein/" in str(link.get("href")): if i.split("/")[0] =="premier-league": if link.get('href') not in self.en_team: self.en_team.append(link.get('href')) elif i.split("/")[0]=="super-lig": if link.get('href') not in self.tr_team: self.tr_team.append(link.get('href')) elif i.split("/")[0]=="laliga": if link.get('href') not in self.isp_team: self.isp_team.append(link.get('href')) elif i.split("/")[0]=="serie-a": if link.get('href') not in self.ital_team: self.ital_team.append(link.get('href')) else: print("hatalı")
def make_soup(url): thepage = urlopen(url) soupdata = soup1(thepage, 'html.parser') return soupdata
csvfile.write(headers) dircount = 1 url = "https://www.amazon.com/amp/mobiles/1715147/buy" #change to your download path browser = webdriver.Chrome(executable_path='/Users/shrinidhipg/Downloads/chromedriver') while(True): try: url = input() except: break #Remove above comments and select all below lines and hit tab browser.get(url) html = browser.page_source soup = soup1(html, "lxml") for brk in soup.findAll('br'): brk.replace_with('.') #print(soup) description = "" for detail in soup.findAll("p", {"class" : "pdp-product-description-content"}): print(detail.text) description = description + detail.text +"." description = "\"" + description + "\"" try: print(soup.find("h1",{"class" : "pdp-title"}).text) title = "\"" + soup.find("h1",{"class" : "pdp-title"}).text + "\"" except: title = "none" pass