Пример #1
0
def getCrawlNoRssRequestLength():
    try:
        http = HttpRequest()
        url = requst_norss_length_url
        response = http.setUrl(url).setBody({}).encrypt([]).post()
        res = json.loads(response)["data"]
        if res == "null":
            res = None
    except Exception, e:
        logging.info("-----%s-----" % e)
        return None
Пример #2
0
def getCrawlNoRssRequestLength():
    try:
        http = HttpRequest()
        url = requst_norss_length_url
        response = http.setUrl(url).setBody({}).encrypt([]).post()
        res = json.loads(response)['data']
        if res == 'null':
            res = None
    except Exception, e:
        logging.info("-----%s-----" % e)
        return None
Пример #3
0
def syncLastMd5(params):

    try:
        http = HttpRequest()
        url = sync_last_md5_url
        response = http.setUrl(url).setBody(params).encrypt([]).post()
        res = json.loads(response)['data']
        if res == 'null':
            res = None
    except Exception, e:
        print e
        logging.info("-----%s-----" % e)
        return None
Пример #4
0
def getCrawlRssRequest(params={}):

    try:
        http = HttpRequest()
        url = request_rss_url
        response = http.setUrl(url).setBody(params).encrypt([]).post()
        res = json.loads(response)['data']
        if res == 'null':
            res = None
    except Exception, e:
        print e
        logging.info("-----%s-----" % e)
        return None
Пример #5
0
def getCrawlRssRequest(params={}):

    try:
        http = HttpRequest()
        url = request_rss_url
        response = http.setUrl(url).setBody(params).encrypt([]).post()
        res = json.loads(response)["data"]
        if res == "null":
            res = None
    except Exception, e:
        print e
        logging.info("-----%s-----" % e)
        return None
Пример #6
0
def syncLastMd5(params):

    try:
        http = HttpRequest()
        url = sync_last_md5_url
        response = http.setUrl(url).setBody(params).encrypt([]).post()
        res = json.loads(response)["data"]
        if res == "null":
            res = None
    except Exception, e:
        print e
        logging.info("-----%s-----" % e)
        return None
Пример #7
0
def requstDistinct(hashCode):
    try:
        http = HttpRequest()
        url = requst_distinct_url
        hashCode = ",".join(hashCode)
        body = {'field': hashCode}
        encryptFields = []
        response = http.setUrl(url).setBody(body).encrypt(encryptFields).post()
        res = json.loads(response)['data']
        if not res:
            return []
        return res
    except Exception, e:
        res = []
        logging.info('-----------%s-------' % e)
        return res
Пример #8
0
def requstDistinct(hashCode):
    try:
        http = HttpRequest()
        url = requst_distinct_url
        hashCode = ",".join(hashCode)
        body = {"field": hashCode}
        encryptFields = []
        response = http.setUrl(url).setBody(body).encrypt(encryptFields).post()
        res = json.loads(response)["data"]
        if not res:
            return []
        return res
    except Exception, e:
        res = []
        logging.info("-----------%s-------" % e)
        return res
Пример #9
0
 def retrieve_data(key, code):
     try:
         logging.info("get daily data of %s" % (key+code))
         s = re.compile(r",(\d+)").sub(r"\1", HttpRequest(DAILY_PRICE_URL % (key+code)).get())
         s = re.compile(r"(\d+)\_(\d+)\_(\d+)").sub(r"\1\2\3", s)
         self.data[code] = json.loads(s)["record"]
     except:
         pass
Пример #10
0
def syncCrawlInfos(dataList):

    try:
        http = HttpRequest()
        http.setTimeout(900)
        url = sync_crawl_infos_url
        sqlList = json.dumps(dataList)
        body = {"sql": sqlList, "checksum": toMd5(sqlList)}
        encryptFields = []
        headerDict = {"Content-Encoding": "gzip", "Accept-Encoding": "gzip"}
        response = http.setUrl(url).setBody(body).setHeader(headerDict).encrypt(encryptFields).post()
        res = json.loads(response)["data"]
        if not res:
            return []
        return res
    except Exception, e:
        res = []
        logging.info("-----------%s-------" % e, True)
        return res
Пример #11
0
def syncCrawlInfos(dataList):

    try:
        http = HttpRequest()
        http.setTimeout(900)
        url = sync_crawl_infos_url
        sqlList = json.dumps(dataList)
        body = {'sql': sqlList, 'checksum': toMd5(sqlList)}
        encryptFields = []
        headerDict = {'Content-Encoding': 'gzip', 'Accept-Encoding': "gzip"}
        response = http.setUrl(url).setBody(body).setHeader(
            headerDict).encrypt(encryptFields).post()
        res = json.loads(response)['data']
        if not res:
            return []
        return res
    except Exception, e:
        res = []
        logging.info('-----------%s-------' % e, True)
        return res
Пример #12
0
 def __init__(self):
     self.code.clear()
     bs = BeautifulSoup(HttpRequest("http://quote.eastmoney.com/stocklist.html").get(), "lxml")
     for a in bs.find('div', id='quotesearch').find_all('a'):
         try:
             rg = re.match(".*(sh|sz)([0-9]{6})\.html", a['href']).groups()
             self.code[rg[0]].add(rg[1])
         except:
             pass
     for key in self.code:
         logging.info("%s: %d" % (key, len(self.code[key])))