def fetchLinks(self, url):
     dom = None
     try:
         response = urlopen(url)
         contenttype = response.info()['Content-Type']
         contenttype = contenttype.strip().lower()
         '''
         if the conten-type is not text/html the url is rejected as it could be images or something else
         that could not be parsed 
         '''
         if not contenttype.startswith('text/html'):
             self.log.debug('rejected = ' + url + ' Content-Type = ' +
                            contenttype)
             return
         dom = parse(response).getroot()
     except urllib.error.HTTPError as e1:
         self.log.warn(''.join(['HTTP error for ', url]))
         self.log.debug(e1, exc_info=sys.exc_info()[2])
         return
     except Exception as e2:
         self.log.warn('cannot parse ' + url)
         self.log.debug(e2, exc_info=sys.exc_info()[2])
         return
     if not dom is None:
         links = dom.cssselect('a')
         if not links is None:
             linksList = []
             for link in links:
                 if not link is None:
                     href = link.get('href')
                     if href is None:
                         continue
                     suburl = URLValidator.formValidURL(url, href)
                     if not suburl is None:
                         linksList.append(suburl)
                     else:
                         self.log.debug('rejected href = ' + href)
             if not linksList:
                 return None
             else:
                 return linksList
 def fetchLinks(self,url):
     dom = None
     try:
         response = urlopen(url)
         contenttype = response.info()['Content-Type']
         contenttype = contenttype.strip().lower()
         '''
         if the conten-type is not text/html the url is rejected as it could be images or something else
         that could not be parsed 
         '''
         if not contenttype.startswith('text/html'):
             self.log.debug('rejected = '+url+' Content-Type = '+contenttype)
             return
         dom = parse(response).getroot()
     except urllib.error.HTTPError as e1:
         self.log.warn(''.join(['HTTP error for ',url]))
         self.log.debug(e1,exc_info=sys.exc_info()[2]) 
         return
     except Exception as  e2:
         self.log.warn('cannot parse '+url)
         self.log.debug(e2,exc_info=sys.exc_info()[2]) 
         return
     if not dom is None:
         links = dom.cssselect('a')
         if not links is None:
             linksList = []
             for link in links:
                 if not link is None: 
                     href = link.get('href')
                     if href is None:
                         continue
                     suburl = URLValidator.formValidURL(url,href)
                     if not suburl is None:
                         linksList.append(suburl)
                     else:
                         self.log.debug('rejected href = '+href)
             if not linksList:
                 return None
             else:
                 return linksList