def industry(names=False, rigs=True, categories=[0], detail=-1): for cat in categories: if not cat in (0, 6, 7, 8, 18, 22): raise LookupError i = Industry(names=names, rigs=rigs, categories=categories, detail=detail) items = i.fetch() return jsonify(items=items)
def parseIndustry(): url = 'http://www.sse.com.cn/sseportal/webapp/datapresent/SSEQueryFirstSSENewAct' from lxml import etree from lxml.html import parse page = parse(url).getroot() result = etree.tostring(page) #print result #r = page.xpath('//td[@class="table3"]'); r = page.xpath('//tr[@valign="top"]') #print len(r) from industry import Industry for data in r: dataTree = etree.ElementTree(data) #print etree.tostring(dataTree) #values = dataTree.xpath('//text()') row = dataTree.xpath('//td') length = len(row) #print length if (length == 5): continue rows = [] for column in row: col = etree.ElementTree(column) #print etree.tostring(col) values = col.xpath('//text()') temp = values[0].strip() #print temp rows.append(temp) industry = Industry() name = str(rows[0]) href = dataTree.xpath('//a/@href') print href try: pe = float(rows[4]) avgPrice = str(rows[5]) industry.name = name industry.PE = pe industry.avgPrice = avgPrice except: continue print industry newUrl = 'http://www.sse.com.cn' + str(href[0]) print newUrl newPage = parse(newUrl).getroot() newResult = etree.tostring(newPage) print newResult