def get_codes(delay=.0): # 20200810: need delay 4s codes = [] urls = [ 'http://app.finance.ifeng.com/list/stock.php?t=ha&f=symbol&o=asc', 'http://app.finance.ifeng.com/list/stock.php?t=hs&f=symbol&o=asc', 'http://app.finance.ifeng.com/list/stock.php?t=sa&f=symbol&o=asc', 'http://app.finance.ifeng.com/list/stock.php?t=kcb&f=symbol&o=asc', ] lr = LRequest(delay=delay) try: for url, m in urls: # logger.info('Load: %s' % url) lr.load(url, isdecode=True) while 1: for ele in lr.xpaths( '//div[@class="tab01"]/table//td[1]/a')[:-1]: code = ele.text.strip() if code.isdigit(): codes.append(code) next_ele = lr.xpath(u'//a[contains(text(), "下一页")]') if next_ele is None: break next_url = urljoin(url, next_ele.attrib['href']) # logger.info('Load: %s' % next_url) lr.load(next_url, isdecode=True) except: logger.error(traceback.format_exc()) return codes
def do(queue, string_proxy): lr = LRequest(string_proxy=string_proxy) while 1: try: # https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=sheets+silk category = queue.get(timeout=30) url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%%3Daps&field-keywords=%s' % urllib.quote_plus( category) lr.load(url) if check_captcha(lr): lr.load(url) ele = lr.xpath('//h2[@id="s-result-count"]') f.write('%s\t%s\n' % (category, ele.text.split( 'result', 1)[0].split('of')[-1].strip().replace(',', ''))) f.flush() print '%s\t%s' % (category, ele.text.split( 'result', 1)[0].split('of')[-1].strip().replace(',', '')) except Empty: print 'empty' break except Exception as e: queue.put(category) print 'EEEEEEEEE %s' % e
def get_new_stock_code(year=None): lr = LRequest() stock_codes = [] if year is None: year = str(datetime.date.today().year) lr.load('http://quotes.money.163.com/data/ipo/shengou.html?reportdate=%s' % year) # lr.loads(BeautifulSoup(lr.body).prettify()) for ele in lr.xpaths('//table[@id="plate_performance"]/tr/td[3]'): # codes # print ele.text.strip() stock_codes.append(ele.text.strip()) for ele in lr.xpaths( '//div[@class="fn_cm_pages"]//a[contains(@href, "page")]' )[:-1]: # pages u = urljoin('http://quotes.money.163.com/data/ipo/shengou.html', ele.attrib['href']) lr.load(u) lr.loads(BeautifulSoup(lr.body, 'lxml').prettify()) for ce in lr.xpaths( '//table[@id="plate_performance"]/tr/td[3]'): # codes # print ce.text.strip() stock_codes.append(ce.text.strip()) return stock_codes
class GoogleSearch(object): search_url = 'https://www.google.%(tld)s/search?q=%(query)s&hl=%(lang)s&filter=%(filter)d&num=%(num)d&start=%(start)s&btnG=Google+Search' def __init__(self, query, *args, **kwargs): self.query = query self._tld = kwargs.get('tld', 'com') self._filter = kwargs.get('filter', 0) self._lang = kwargs.get('lang', 'en') self._num = kwargs.get('num', 100) self._page = kwargs.get('page', 0) timeout = kwargs.get('timeout', 90) string_proxy = kwargs.get('string_proxy', None) self.lr = LRequest(timeout=timeout, string_proxy=string_proxy, handers=[GoogleHTTPErrorProcessor(), ]) @property def page(self): return self._page @page.setter def page(self, value): self._page = value def _get_result(self): safe_url = self.search_url % {'query': urllib.quote_plus(self.query), 'start': self.page * self._num, 'num': self._num, 'tld' : self._tld, 'lang' : self._lang, 'filter': self._filter} print safe_url self.lr.load(safe_url) results = [] i = 0 for r in self.lr.xpath('//li[@class="g"]'): i += 1 result = {} result['title'] = ''.join(r.xpath('./div/h3//text()')) result['description'] = ''.join(r.xpath('./div//span[@class="st"]//text()')) result['url'] = ''.join(r.xpath('./div/h3/a/@href')) results.append(result) print i return results def get_result(self): return self._get_result()
def get_codes_sina(delay=.0): codes = [] # url = 'http://vip.stock.finance.sina.com.cn/datacenter/hqstat.html#jdgd' url = '''http://money.finance.sina.com.cn/quotes_service/api/jsonp_v2.php/IO.XSRV2.CallbackList['ys65jC9HtVOEBgTh']/StatisticsService.getPeriodList?page=1&num=9999&sort=_5high&asc=0&node=adr_hk''' lr = LRequest(delay=delay) try: lr.load(url, isdecode=True) for s in json.loads(lr.body.split('](', 1)[-1][:-2]): codes.append(s['symbol']) except: logger.error(traceback.format_exc()) return codes
def do(queue, string_proxy): lr = LRequest(string_proxy=string_proxy) while 1: try: # https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=sheets+silk category = queue.get(timeout=30) url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%%3Daps&field-keywords=%s' % urllib.quote_plus( category) lr.load(url) if check_captcha(lr): lr.load(url) total_price = 0.0 count = 0.0 price_eles = lr.xpaths( '//span[contains(@class, "s-price a-text-bold")]') for price_ele in price_eles: # $49.99 price = price_ele.text.replace('$', '').replace(',', '').split( '-', 1)[0].strip() try: float(price) except: pass else: total_price += float(price) count += 1 if count > 0: ave_price = total_price / count ele = lr.xpath('//h2[@id="s-result-count"]') f.write('%s\t%s\t%.2f\n' % (category, ele.text.split( 'result', 1)[0].split('of')[-1].strip().replace( ',', ''), ave_price)) f.flush() print '%s\t%s\t%.2f' % (category, ele.text.split( 'result', 1)[0].split('of')[-1].strip().replace(',', ''), ave_price) except Empty: print 'empty' break except Exception as e: traceback.print_exc() queue.put(category) print 'EEEEEEEEE %s' % e
def iter_name(string_proxy, queue): lr = LRequest(string_proxy) while 1: try: url, deep = queue.get(timeout=30) xp = '//ul[@id="zg_browseRoot"]/%s/li/a' % '/'.join(['ul' for i in range(deep)]) # print xp lr.load(url.encode('utf-8')) next_deep = deep + 1 for ele in lr.xpaths(xp): name = ele.text.strip() if name not in categories: categories.add(name) print name.encode('utf-8') queue.put([ele.attrib['href'], next_deep]) except Empty: print 'Empty'
def get_all_codes(): stock_code_url = 'http://quote.eastmoney.com/center/gridlist.html' # 'http://quote.eastmoney.com/stocklist.html' # us: http://quote.eastmoney.com/usstocklist.html exchanges = ['ss', 'sz', 'hk'] lr = LRequest() stock_codes = [] lr.load(stock_code_url) # stock_eles = lr.xpath('//div[@id="quotesearch"]//li/a[@target="_blank"]') stock_exchange_eles = lr.xpaths('//div[@id="quotesearch"]/ul') for i, stock_exchange_ele in enumerate(stock_exchange_eles): stock_eles = stock_exchange_ele.xpath('./li/a[@target="_blank"]') for stock_ele in stock_eles: # code = stock_ele.get('href').rsplit('/', 1)[-1].split('.', 1)[0] if stock_ele.text: code = stock_ele.text.split('(', 1)[-1].split(')', 1)[0] stock_codes.append((exchanges[i], code)) return stock_codes
def iter_name(string_proxy, queue): lr = LRequest(string_proxy) while 1: try: url, deep = queue.get(timeout=30) xp = '//ul[@id="zg_browseRoot"]/%s/li/a' % '/'.join(['ul' for i in range(deep)]) # logger.info(xp) lr.load(url) next_deep = deep + 1 for ele in lr.xpaths(xp): name = ele.text.strip() if name not in categories: categories.add(name) logger.info(name) queue.put([ele.attrib['href'], next_deep]) # except KeyboardInterrupt: # return except Exception as ex: traceback.print_exec() except Empty: logger.info('Empty')
class GsaCaptcha(): lr = None ip = '' port = '' def __init__(self, ip='127.0.0.1', port='80'): self.ip = ip self.port = port self.lr = LRequest() def decode(self, file_path): try: form = ParseFile(StringIO.StringIO(gsa_form_str % (self.ip, self.port)), base_uri='http://%s:%s' % (self.ip, self.port))[0] form.add_file(open(file_path, 'rb'), name='file') self.lr.load(form.click(), is_xpath=False) gsa_result = self.lr.body result = '' if gsa_result.find('<span id="captcha_result">') > -1: result = gsa_result.split( '<span id="captcha_result">')[1].split('</span>')[0] return result except: raise def decode_stream(self, file_data): try: form = ParseFile(StringIO.StringIO(gsa_form_str % (self.ip, self.port)), base_uri='http://%s:%s' % (self.ip, self.port))[0] form.add_file(StringIO.StringIO(file_data), name='file') self.lr.load(form.click(), is_xpath=False) result = '' gsa_result = self.lr.body if gsa_result.find('<span id="captcha_result">') > -1: result = gsa_result.split( '<span id="captcha_result">')[1].split('</span>')[0] return result except: raise def decode_url(self, url): try: self.lr.load(url) form = ParseFile(StringIO.StringIO(gsa_form_str % (self.ip, self.port)), base_uri='http://%s:%s' % (self.ip, self.port))[0] form.add_file(StringIO.StringIO(self.lr.body), name='file') self.lr.load(form.click(), is_xpath=False) result = '' gsa_result = self.lr.body if gsa_result.find('<span id="captcha_result">') > -1: result = gsa_result.split( '<span id="captcha_result">')[1].split('</span>')[0] return result except: raise
class AmazonBase(object): CACHE_ROOT = '' CACHE_PAGES_ROOT = '' CACHE_IMAGES_ROOT = '' CACHE_EXPIRED_DAYS = 15 captcha = None def __init__(self, **kwargs): self.lr = LRequest(string_proxy=kwargs.get('string_proxy', '')) self.captcha = GsaCaptcha(ip=kwargs.get('gsa_ip', '192.168.1.188'), port=kwargs.get('gsa_port', '8000')) self.CACHE_ROOT = config.AMAZON_CACHE_ROOT self.CACHE_PAGES_ROOT = kwargs.get( 'cache_page', os.path.join(self.CACHE_ROOT, 'pages')) self.CACHE_IMAGES_ROOT = kwargs.get( 'cache_image', os.path.join(self.CACHE_ROOT, 'images')) if not os.path.exists(self.CACHE_ROOT): os.makedirs(self.CACHE_ROOT) if not os.path.exists(self.CACHE_PAGES_ROOT): os.makedirs(self.CACHE_PAGES_ROOT) if not os.path.exists(self.CACHE_IMAGES_ROOT): os.makedirs(self.CACHE_IMAGES_ROOT) self.domain = kwargs.get('domain', 'amazon.com') self.CACHE_EXPIRED_DAYS = kwargs.get('cache_expired_days', 15) def load(self, url, is_xpath=True, is_decode=True): # logger.info('Load Url: %s' % url) url = urllib.parse.quote(url, safe='https:/') self.lr.load(url, is_xpath=is_xpath, is_decode=is_decode) if self.check_captcha(): self.lr.load(url, is_xpath=is_xpath, is_decode=is_decode) def check_captcha(self): if self.captcha is not None: captcha_img_ele = self.lr.xpath( '//form[contains(@action, "Captcha")]//img[contains(@src, "captcha")]' ) if captcha_img_ele is not None: while 1: logger.info('Need Captcha') try: if captcha_img_ele is not None: print('##### %s ' % captcha_img_ele.attrib['src']) form = self.lr.get_forms()[0] self.lr.load(captcha_img_ele.attrib['src']) cap = self.captcha.decode_stream(self.lr.body) logger.info('Captcha: %s' % cap) form['field-keywords'] = cap self.lr.load(form.click()) else: return True captcha_img_ele = self.lr.xpath( '//form[contains(@action, "Captcha")]//img[contains(@src, "captcha")]' ) except KeyboardInterrupt: raise except IndexError: self.lr.load(self.lr.current_url) captcha_img_ele = self.lr.xpath( '//form[contains(@action, "Captcha")]//img[contains(@src, "captcha")]' ) if captcha_img_ele is None: return True except: # open(os.path.join('I:\\captcha_error_page', '%s.html' % time.time()), 'w').write(self.lr.body) logger.error(traceback.format_exc()) return False else: raise RuntimeError('Not Captcha Server...') def exists_cache(self, cache_name): cache_path = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0], cache_name[1], cache_name) return os.path.exists(cache_path) def remove_cache(self, cache_name): cache_path = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0], cache_name[1], cache_name) if os.path.exists(cache_path): try: os.remove(cache_path) except: pass def load_cache(self, cache_name): cache_path = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0], cache_name[1], cache_name) if os.path.exists(cache_path): try: return pickle.loads(gzip.GzipFile(cache_path, 'rb').read()) except: return {} return {} def save_cache(self, cache_name, data): _p = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0], cache_name[1]) if not os.path.exists(_p): os.makedirs(_p) cache_path = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0], cache_name[1], cache_name) gzip_file = gzip.open(cache_path, 'wb') gzip_file.write(pickle.dumps(data)) gzip_file.close() def exists_image(self, name): image_path = os.path.join(self.CACHE_IMAGES_ROOT, name[0], name[1], name) return os.path.exists(image_path) def save_image(self, name, data): _p = os.path.join(self.CACHE_IMAGES_ROOT, name[0], name[1]) if not os.path.exists(_p): os.makedirs(_p) image_path = os.path.join(self.CACHE_IMAGES_ROOT, name[0], name[1], name) open(image_path, 'wb').write(data) @staticmethod def wrapped_url(url): return url.split('/ref', 1)[0] @cache() @load_html @name @price @brand @merchant @sold_by @reviews @star @ranks_str @other_seller @weight_ounces def product_detail(self, asin, is_cache=True, **kwargs): return kwargs.get('product_info', {}) @cache() @load_html @image_urls @image_data def product(self, asin, is_cache=True, **kwargs): return kwargs.get('product_info', {})
# -*- coding: utf-8 -*- __author__ = 'xtwxfxk' import urllib.parse from lutils.lrequest import LRequest # url = 'https://www.amazon.com/Best-Sellers-Home-Kitchen/zgbs/home-garden/ref=zg_bs_nav_0' url = 'https://www.amazon.com/Best-Sellers-Home-Kitchen-Décor-Products/zgbs/home-garden/1063278' # url = urllib.parse.quote('https://www.amazon.com/Best-Sellers-Home-Kitchen-Décor-Products/zgbs/home-garden/1063278') # url = urllib.parse.urlencode('https://www.amazon.com/Best-Sellers-Home-Kitchen-Décor-Products/zgbs/home-garden/1063278') url = urllib.parse.quote(url, safe='https:/') print(url) lr = LRequest() lr.load(url, is_decode=True) eles = lr.xpaths('//ul[@id="zg_browseRoot"]/ul/ul/ul/li/a') for ele in eles: print(ele.text.strip(), ele.attrib['href']) # https://www.amazon.com/Best-Sellers-Home-Kitchen-D%C3%A9cor-Products/zgbs/home-garden/1063278/
class LStockData(): start_url = 'http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml' url_format = 'http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml?year=%s&jidu=%s' real_time_date_url = 'http://hq2fls.eastmoney.com/EM_Quote2010PictureApplication/Flash.aspx?Type=CR&ID=6035771&r=0.8572017126716673' def __init__(self, delay=0.0, cache=None, debuglevel=0): #, input, output, **kwargs): # threading.Thread.__init__(self) # self.input = input # self.output = output self.count = 0 self.cache = cache self.debuglevel = debuglevel self.lr = LRequest(delay=delay) def _fetch_detail(self): details = [] if self.lr.body.find('class="datatbl"') > -1: trs = self.lr.xpaths('//table[@class="datatbl"]//tr')[1:] for tr in trs: t = tr.xpath('./th[1]')[0].text.strip() price = tr.xpath('./td[1]')[0].text.strip() _price_change = tr.xpath('./td[2]')[0].text.strip() volume = tr.xpath('./td[3]')[0].text.strip() _turnover = tr.xpath('./td[4]')[0].text.strip() _nature = bytes( ''.join(tr.xpath('./th[2]')[0].itertext()).strip(), 'ISO-8859-1').decode('gbk') if _nature == '卖盘': nature = 'sell' elif _nature == '买盘': nature = 'buy' elif _nature == '中性盘': nature = 'neutral_plate' else: nature = _nature price_change = '0.0' if _price_change != '--': price_change = _price_change turnover = _turnover.replace(',', '') details.append({ 'time': t, 'price': price, 'price_change': price_change, 'volume': volume, 'turnover': turnover, 'nature': nature, }) return details def _check_delay(self): if (time.time() - self.t1) > 1800: logger.info('Wait 60 Sec..') time.sleep(60) self.t1 = time.time() # @try_request_count(wait_count=50) @try_except_response def load(self, url): return self.lr.load(url) def search_to_h5(self, code, save_path, start_year=2007, mode='a', is_detail=True): h5file = tables.open_file(save_path, mode=mode) k_line_mins = [5, 15, 30, 60] end_year = datetime.date.today().year + 1 self.t1 = time.time() try: if '/stock' not in h5file: stocks_group = h5file.create_group('/', 'stock', 'Stock Information') else: stocks_group = h5file.get_node('/stock') if '/stock/stocks' not in h5file: stock_table = h5file.create_table(stocks_group, 'stocks', Stocks, "Stock Table") else: stock_table = h5file.get_node('/stock/stocks') stock = stock_table.row if '/stock/details' not in h5file: detail_table = h5file.create_table(stocks_group, 'details', StockDetails, "Stock Detail Table") else: detail_table = h5file.get_node('/stock/details') detail = detail_table.row #################################### # # 20200810 server disable # #################################### # if stock_table.nrows > 0: # last_data = stock_table[-1] # last_date = str(last_data[0]).split('_')[-1] # last_date = '%s-%s-%s' % (last_date[0:4], last_date[4:6], last_date[6:8]) # start_year = last_date.split('-')[0] # else: # last_date = '1990-01-01' # last_year = '1990' # url = self.start_url % code # # logger.info('Load Url: %s' % url) # self.load(url) # _start_year = self.lr.xpaths('//select[@name="year"]/option')[-1].attrib['value'].strip() # # if _start_year < '2007': # # _start_year = '2007' # _start_year = int(_start_year) # if start_year < _start_year: # start_year = _start_year # t = datetime.datetime.strptime(last_date, '%Y-%m-%d') # quarter = pd.Timestamp(t).quarter # start_year = int(start_year) # for year in range(start_year, end_year): # for quarter in range(quarter, 5): # try: # self._check_delay() # _url = self.url_format % (code, year, quarter) # # logger.info('Load: %s: %s' % (code, _url)) # # time.sleep(1) # random.randint(1, 5)) # self.load(_url) # if self.lr.body.find('FundHoldSharesTable') > -1: # records = list(self.lr.xpaths('//table[@id="FundHoldSharesTable"]//tr')[2:]) # records.reverse() # for record in records: # _date = record.xpath('./td[1]/div')[0].text.strip() # # _date = record.xpath('./td[1]/div[1]/text()')[0].strip() # detail_url = '' # if not _date: # _date = record.xpath('./td[1]/div/a')[0].text.strip() # detail_url = record.xpath('./td[1]/div/a')[0].attrib['href'].strip() # if _date <= last_date: # continue # _opening_price = record.xpath('./td[2]/div')[0].text.strip() # _highest_price = record.xpath('./td[3]/div')[0].text.strip() # _closing_price = record.xpath('./td[4]/div')[0].text.strip() # _floor_price = record.xpath('./td[5]/div')[0].text.strip() # _trading_volume = record.xpath('./td[6]/div')[0].text.strip() # _transaction_amount = record.xpath('./td[7]/div')[0].text.strip() # _id = '%s_%s' % (code, _date) # _date = _date.replace('-', '') # if is_detail: # details = [] # if detail_url: # params = parse_qs(urlparse(detail_url).query, True) # detail_last_page = 'http://market.finance.sina.com.cn/transHis.php?date=%s&symbol=%s' % (params['date'][0], params['symbol'][0]) # # time.sleep(1) # self.load(detail_last_page) # # logger.info('Load Detail: %s: %s' % (code, detail_down_url)) # details.extend(self._fetch_detail()) # if self.lr.body.find('var detailPages=') > -1: # pages = json.loads(self.lr.body.split('var detailPages=', 1)[-1].split(';;')[0].replace("'", '"'))[1:] # for page in pages: # self._check_delay() # # time.sleep(1) # random.randint(1, 5)) # detail_page = '%s&page=%s' % (detail_last_page, page[0]) # self.load(detail_page) # details.extend(self._fetch_detail()) # details.reverse() # for d in details: # # detail['id'] = _id # detail['date'] = _date # detail['time'] = d['time'] # detail['price'] = d['price'] # d['price'].split(u'\u0000', 1)[0] if d['price'] else 0.0 # detail['price_change'] = d['price_change'] # detail['volume'] = d['volume'] # detail['turnover'] = d['turnover'] # detail['nature'] = d['nature'] # detail.append() # # stock['id'] = _id # stock['date'] = _date # stock['open'] = _opening_price # stock['high'] = _highest_price # stock['close'] = _closing_price # stock['low'] = _floor_price # stock['volume'] = _trading_volume # stock['amount'] = _transaction_amount # stock.append() # h5file.flush() # except: # raise # quarter = 1 # # stock_table.flush() ############################################### h5file.flush() except: logger.error(traceback.format_exc()) open('tmp/last.html', 'w').write(self.lr.body) raise finally: h5file.flush() h5file.close() def search_to_h5_k_line(self, code, save_path, start_year=2007, mode='a'): h5file = tables.open_file(save_path, mode=mode) # h5file = h5py.File(save_path, 'r+') k_line_mins = [5, 15, 30, 60] end_year = datetime.date.today().year + 1 self.t1 = time.time() try: if '/stock' not in h5file: stocks_group = h5file.create_group('/', 'stock', 'Stock Information') else: stocks_group = h5file.get_node('/stock') #################################### # # new k line data 5m, 15m, 30m, 60m # #################################### kline_rows = {} for kmin in k_line_mins: if '/stock/kline%s' % kmin not in h5file: kline_table = h5file.create_table( stocks_group, 'kline%s' % kmin, StockKLines, "Stock K line %sm Table" % kmin) else: kline_table = h5file.get_node('/stock/kline%s' % kmin) kline_rows[kmin] = kline_table h5file.flush() # http://money.finance.sina.com.cn/quotes_service/api/json_v2.php/CN_MarketData.getKLineData?symbol=sz002095&scale=5&ma=no&datalen=1023 for kmin in k_line_mins: k_line_url = 'http://money.finance.sina.com.cn/quotes_service/api/json_v2.php/CN_MarketData.getKLineData?symbol=%s&scale=%s&ma=no&datalen=1023' % ( code, kmin) try: # logger.info('K line url: %s' % k_line_url) kline_row = kline_rows[kmin].row self.load(k_line_url) if 'null' != self.lr.body.strip(): kline_datas = json.loads(self.lr.body) last_data = None if kline_rows[kmin].nrows > 0: last_data = kline_rows[kmin][-1] for kline_data in kline_datas: # [{"day":"2020-08-07 15:00:00","open":"20.390","high":"20.390","low":"20.300","close":"20.300","volume":"54500"}, ...] day = int( datetime.datetime.strptime( kline_data['day'], '%Y-%m-%d %H:%M:%S').timestamp()) if last_data is None or last_data[0] < day: kline_row['date'] = day kline_row['open'] = kline_data['open'] kline_row['high'] = kline_data['high'] kline_row['close'] = kline_data['close'] kline_row['low'] = kline_data['low'] kline_row['volume'] = kline_data['volume'] kline_row.append() except: logger.error('Error Url: %s' % k_line_url) logger.error(traceback.format_exc()) open('tmp/last.html', 'w').write(self.lr.body) ############## end ################# h5file.flush() except: logger.error(traceback.format_exc()) open('tmp/last.html', 'w').write(self.lr.body) raise finally: h5file.flush() h5file.close()