Exemplo n.º 1
0
 def perform(self):
     curl = Curl( str(self.url) )
     curl.perform(False)
     if curl.getHttpReturnCode() == 200:
         self.current_focus, self.monthly_focus = self.__dump( curl.getHttpContent() )
         return True
     else:
         return False
Exemplo n.º 2
0
def get_stock_info(id):
    id = str(id)
    res = {}
    url = "http://www.sse.com.cn/sseportal/webapp/datapresent/SSEQueryListCmpAct?"\
          "reportName=QueryListCmpRpt&REPORTTYPE=GSZC&PRODUCTID=%s&COMPANY_CODE=%s" % (id, id)
    logger.info(url)
    logger.info("fetch [%s] info %s" % (id, url) )
    print "fetch [%s] info %s" % (id, url)
    a = Curl(url)
    a.perform(False)
    logger.info(a.getHttpHeader())
    if a.getHttpReturnCode() == 200:
        re_obj = re.compile(r"<td class=\"content_b\".+?>(?P<key>.+?)</td>.+?<td.+?>(?P<value>.+?)</td>", re.S)
        for item in re_obj.findall(a.getHttpContent()):
            if u"上市日" in item[0]:
                res['date'] = filter_html(item[1])
            if u"公司全称" in item[0]:
                name = re.split(r"\r\n", filter_html(item[1]) )
                res['name'] = name[0]
                res['en_name'] = name[1]
            if u"注册地址" in item[0]:
                res['address'] = filter_html(item[1])
            if u"网址" in item[0]:
                res['web'] = filter_html(item[1])
            if u"所属省/直辖市" in item[0]:
                res['state'] = filter_html(item[1])
            if u"CSRC行业" in item[0]:
                res['category'] = re.split("\s+", filter_html(item[1]))[0]
        return res
    else:
        return None
Exemplo n.º 3
0
def get_stock_list(offset):
    res = []
    url = "http://www.sse.com.cn/sseportal/webapp/datapresent/SSEQueryStockInfoAct"\
          "?reportName=BizCompStockInfoRpt&PRODUCTID=&PRODUCTJP=&PRODUCTNAME=&keyword=&tab_flg=&CURSOR=%d" % offset
    logger.info(url)
    logger.info("fetch URL %s" % url)
    print "fetch URL %s" % url
    a = Curl(url)
    a.perform(False)
    logger.info(a.getHttpHeader())
    if a.getHttpReturnCode() == 200:
        re_obj = re.compile(r"<table.+?bgcolor=\"#337fb2\">(.+?)</table>", re.S)
        match = re_obj.search(a.getHttpContent())
        if match:
            content = match.groups()[0]
            tr_obj = re.compile(r"<tr>.+?</tr>", re.S)
            for item in tr_obj.findall(content):
                try:
                    stock_obj = re.compile(r"<td.+?><a href=.+?>(.+?)</a></td>.+?<td.+?>(.+?)</td>", re.S)
                    stock_match = stock_obj.search(item)
                    res.append( [stock_match.groups()[0], stock_match.groups()[1]] )
                except Exception:
                    continue
            return res
        else:
            return None
    else:
        return None