def read_soup(page): ''' Reads a web page and soupifies it ''' # Read the page url, html, cookie_jar = fetch_url( page ) # Parse the page return bs4.BeautifulSoup( html )
def read_soup(page): ''' Reads a web page and soupifies it ''' # Read the page html = fetch_url(page)[1] # Parse the page return bs4.BeautifulSoup(html)
def save_file(filename, url): k = 1 while True: try: url, data_blob, cookies = fetch_url( url ) break except urllib2.HTTPError: logger.error('Could not read PDF: %s DOC: %s' % ( url, filename)) k += 1 if k == MAX_ATTEMPTS: raise DREError('Couldn\'t get the PDF: %s' % url ) logger.debug('Sleeping 2 secs...') time.sleep(2) with open(filename, 'wb') as f: f.write(data_blob) f.close()
def read_bdp_file(self): url, payload, cj = fetch_url( BDP_SOURCE_URL ) return csv.reader( StringIO.StringIO( payload ), delimiter=';')
def save_file(filename, url): k = 1 data_blob=fetch_url(url)[1] with open(filename, 'wb') as f: f.write(data_blob) f.close()