Пример #1
0
 def item_list(self, url, **param):
     '''
     url               http://www.immobilien.net/treffer.aspx
     eCult             at
     eDB               DBImmobiliensuche
     egeo0             39926
     egeo1             39882
     etype0            352
     etype1            354
     iskauf            true
     kaufpreisbis      500000
     nutzflaechevon    200
     '''
     #download page and cook some soup
     html = downloader.download_page('?'.join([url, urllib.urlencode(param)]) if param else url, config.http_request_retries, config.http_request_sleep).read()
     soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES)
     
     #yield the item links and ids
     for item in soup.findAll('a', id=re.compile('majorContent_sbContainer_ctl00_resCTXContainer_ctl00_LoginView2_Tab3_resultList_LinkLiteral1')):
         item_url = self.base_url + item['href']
         yield RealEstateObject(affiliate=self.__name__, url=item_url, id=re.search('/([0-9]+-[0-9]+)/', item_url).group(1))
     
     #if there's a next page, do the same for that one too    
     next_page = soup.find('a', id=re.compile('majorContent_sbContainer_ctl00_resCTXContainer_ctl00_LoginView2_Tab3_nextLink'))
     
     if next_page and next_page.has_key('href'):
         next_page_url = self.base_url + next_page['href']
         for item in self.item_list(next_page_url): yield item
Пример #2
0
 def item_details(self, url=None, item=None):
     '''
     '''
     if item is None: item=RealEstateObject(affiliate=self.__name__, url=url)
     
     #download page and cook some soup
     html = downloader.download_page(item.url, config.http_request_retries, config.http_request_sleep).read()
     soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES)
     
     for griditem in soup.findAll('div', {'class': 'detailGrid'}):
         key = griditem.find('dt').text
         value = griditem.find('dd').text
         print key, value
         
     print soup.find('title').text.strip()
     
     print soup.find('div', {'class': 'panel'})
         
     
     return item