def _get_sina_Market_url(market='sh_a', count=None, num='1000'): if count == None: url = ct.JSON_Market_Center_CountURL % (market) # print url data = cct.get_url_data(url) # print data count = re.findall('(\d+)', data, re.S) urllist = [] if len(count) > 0: count = count[0] if int(count) >= int(num): page_count = int(math.ceil(int(count) / int(num))) for page in range(1, page_count + 1): # print page url = ct.JSON_Market_Center_RealURL % (page, num, market) # print "url",url urllist.append(url) else: url = ct.JSON_Market_Center_RealURL % ('1', count, market) urllist.append(url) # print "%s count: %s"%(market,count), # print urllist[0], return urllist
def _get_sina_json_dd_url(vol='0', type='3', num='10000', count=None): urllist = [] if count == None: url = ct.JSON_DD_CountURL % (ct.DD_VOL_List[vol], type) log.info("_json_dd_url:%s"%url) data = cct.get_url_data(url) # return [] # print data.find('abc') count = re.findall('(\d+)', data, re.S) log.debug("_json_dd_url_count:%s"%count) # print count if len(count) > 0: count = count[0] print ("Big:%s"%(count)), if int(count) >= int(num): page_count = int(math.ceil(int(count) / int(num))) for page in range(1, page_count + 1): # print page url = ct.JSON_DD_Data_URL_Page % (int(num), page, ct.DD_VOL_List[vol], type) urllist.append(url) else: url = ct.JSON_DD_Data_URL_Page % (count, '1', ct.DD_VOL_List[vol], type) urllist.append(url) else: return [] else: url = ct.JSON_DD_CountURL % (ct.DD_VOL_List[vol], type) # print url data = cct.get_url_data(url) # print data count_now = re.findall('(\d+)', data, re.S) urllist = [] if count < count_now: count_diff = int(count_now) - int(count) if int(math.ceil(int(count_diff) / 10000)) >= 1: page_start = int(math.ceil(int(count) / 10000)) page_end = int(math.ceil(int(count_now) / 10000)) for page in range(page_start, page_end + 1): # print page url = ct.JSON_DD_Data_URL_Page % ('10000', page, ct.DD_VOL_List[vol], type) urllist.append(url) else: page = int(math.ceil(int(count_now) / 10000)) url = ct.JSON_DD_Data_URL_Page % ('10000', page, ct.DD_VOL_List[vol], type) urllist.append(url) # print "url:",urllist[:0] return urllist
def get_sina_tick_js_LastPrice(symbols): symbols_list='' if len(symbols) == 0: return '' if isinstance(symbols, list) or isinstance(symbols, set) or isinstance(symbols, tuple) or isinstance(symbols, pd.Series): for code in symbols: symbols_list += cct.code_to_symbol(code) + ',' else: symbols_list = cct.code_to_symbol(symbols) # print symbol_str url="http://hq.sinajs.cn/list=%s"%(symbols_list) # print url data = cct.get_url_data(url) # vollist=re.findall('{data:(\d+)',code) # print data ulist=data.split(";") price_dict={} for var in range(0,len(ulist)-1): # print var if len(ulist)==2: code=symbols else: code=symbols[var] tempData = re.search('''(")(.+)(")''', ulist[var]).group(2) stockInfo = tempData.split(",") # stockName = stockInfo[0] #名称 # stockStart = stockInfo[1] #开盘 stockLastEnd= stockInfo[2] #昨收盘 # stockCur = stockInfo[3] #当前 # stockMax = stockInfo[4] #最高 # stockMin = stockInfo[5] #最低 # price_dict[code]=stockLastEnd price_dict[code]=float(stockLastEnd) # stockUp = round(float(stockCur) - float(stockLastEnd), 2) # stockRange = round(float(stockUp) / float(stockLastEnd), 4) * 100 # stockVolume = round(float(stockInfo[8]) / (100 * 10000), 2) # stockMoney = round(float(stockInfo[9]) / (100000000), 2) # stockTime = stockInfo[31] # dd={} return price_dict
def get_tzrq(url, today): url = url % today data = cct.get_url_data(url) # data = cct.get_url_data_R(url) # vollist=re.findall('{data:(\d+)',code) vol_l = re.findall('\"([\d\D]+?)\"', data) # print vol_l dd = {} # print vol_l # print len(vol_l) if len(vol_l) == 3: data = vol_l[0].split(',') data2 = vol_l[1].split(',') dataall = vol_l[2].split(',') dd['sh'] = round( float(data[5]) / 100000000, 1) if len(data[5]) > 0 else 0 dd['sz'] = round( float(data2[5]) / 100000000, 1) if len(data2[5]) > 0 else 0 dd['all'] = round( float(dataall[5]) / 100000000, 1) if len(dataall[5]) > 0 else 0 return dd
def _parsing_sina_dd_price_json(url): """ 处理当日行情分页数据,格式为json Parameters ------ pageNum:页码 return ------- DataFrame 当日所有股票交易数据(DataFrame) """ ct._write_console() # request = Request(ct.SINA_DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], # ct.PAGES['jv'], pageNum)) # request = Request(url) # text = urlopen(request, timeout=10).read() text = cct.get_url_data(url) # print(len(text)) # return text if len(text) < 10: return '' reg = re.compile(r'\,(.*?)\:') text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text) text = text.replace('"{symbol', '{"code') text = text.replace('{symbol', '{"code"') if ct.PY3: jstr = json.dumps(text) else: # jstr = json.dumps(text, encoding='GBK') jstr = json.dumps(text) js = json.loads(jstr) df = pd.DataFrame(pd.read_json(js, dtype={'code': object}), columns=ct.DAY_REAL_DD_COLUMNS) df = df.drop('symbol', axis=1) df = df.ix[df.volume > 0] # print "" # print df['name'][len(df.index)-1:],len(df.index) return df
def get_wencai_Market_url(filter='国企改革',perpage=1,url=None,): urllist = [] global null,wencai_count df = pd.DataFrame() if url == None and wencai_count < 1: time_s = time.time() wencairoot = 'http://www.iwencai.com/stockpick/search?typed=0&preParams=&ts=1&f=1&qs=result_original&selfsectsn=&querytype=&searchfilter=&tid=stockpick&w=%s' url = wencairoot%(filter) log.debug("url:%s"%(url)) # url = ct.get_url_data_R % (market) # print url cache_root="http://www.iwencai.com/stockpick/cache?token=%s&p=1&perpage=%s&showType=" cache_ends = "[%22%22,%22%22,%22onTable%22,%22onTable%22,%22onTable%22,%22onTable%22,%22onTable%22,%22onTable%22,%22onTable%22,%22onTable%22]" data = cct.get_url_data(url,retry_count=1) if len(re.findall('系统判断您访问次数过多'.decode('utf8'),data)): wencai_count+=1 log.error("acces deny:%s"%('系统判断您访问次数过多')) return df # print data # count = re.findall('(\d+)', data, re.S) # "token":"dcf3d42bbeeb32718a243a19a616c217" # log.info("data:%s"%(data.decode('unicode-escape'))) # log.info("data:%s"%(data)) # count = re.findall('token":"([\D\d].*)"', data, re.S) count = re.findall('token":"([\D\d]+?)"', data, re.S) codelist = [] grep_stock_codes = re.compile('"(\d{6})\.S') # response = requests.get(all_stock_codes_url) # stock_codes = grep_stock_codes.findall(response.text) # print data log.info("net time:%s"%(time.time()-time_s)) # print count if len(count) == 1: cacheurl = cache_root % (count[0],perpage) cacheurl = cacheurl + cache_ends log.info( cacheurl) time_s = time.time() html = cct.get_url_data(cacheurl) # count = re.findall('"(\d{6})\.S', data, re.S) # count = re.findall('result":(\[[\D\d]+\]),"oriColPos', data, re.S) # count = re.findall('result":(\[[\D\d]+\]),"oriIndexID', data, re.S) # html = data.decode('unicode-escape') # html = data.decode('unicode-escape') count = re.findall('(\[\["[0-9]{6}\.S[HZ][\D\d]+\]\]),"oriIndexID', html, re.S) # count = grep_stock_codes.findall(data,re.S) log.info("count: len:%s"%(len(count))) # print html log.info( time.time()-time_s) if len(count) > 0: # import ast # result = eval(count[0].replace('null','None')) result = eval(count[0]) # result = ast.literal_eval(count[0]) # import json # obj = json.loads(data) # print "obj:",obj # print result,len(result) # print result[1] urllist = [] dlist = [] key_t=[] for xcode in result: # print xcode code_t =[] for x in xcode: # print x if isinstance(x, list): # print "list:",x key_t=[] for y in x: if isinstance(y, dict): # pass keylist=['URL','PageRawTitle'] for key in y.keys(): if key in keylist: if key == 'URL': urls = str(y[key]).replace('\\','').strip().decode('unicode-escape') if urls[-20] not in urllist: urllist.append(urls[-20]) log.info( urls), # log.info( urls) else: break else: urls = str(y[key]).decode('unicode-escape') key_t.append(urls) # key_t.append(urls) log.info( urls), # else: # print str(y).decode('unicode-escape'), else: code_t.append(str(x).decode('unicode-escape')) # code_t.append(str(x)) log.debug(str(x).decode('unicode-escape')), # log.info(str(x)), # log.info( key_t) if len(code_t) > 4: code = code_t[0] name = code_t[1] trade = code_t[2] trade = '0' if trade == '--' else trade percent = code_t[3] percent = '0' if percent == '--' else percent # index = code_t[4] category = ";".join(x for x in code_t[4].split(';')[:3]) category = category[:20] if len(category) > 20 else category if len(key_t) > 0: # print key_t[0] title1 = key_t[0] if len(key_t) > 1: title2 = key_t[1] else: title2 = None dlist.append({'code': code, 'name': name, 'trade': trade, 'percent': percent, 'category': category, 'tilte1': title1,'tilte2': title2}) else: dlist.append({'code': code, 'name': name, 'trade': trade, 'percent': percent, 'category': category}) # print '' # df = pd.DataFrame(dt_list, columns=ct.TDX_Day_columns) # df = pd.DataFrame(dlist, columns=['category','code','name','trade','percent','tilte1','tilte2']) df = pd.DataFrame(dlist, columns=['code','name','trade','percent','category','tilte1','tilte2']) # if len(dlist) > 0 and 'tilte1' in (dlist[0].keys()) : # df = pd.DataFrame(dlist, columns=['code','name','trade','percent','category','tilte1','tilte2']) # else: # df = pd.DataFrame(dlist, columns=['code','name','trade','percent','category']) df['code'] = (map(lambda x: x[:6],df['code'])) if len(df) > 0: df.percent = df.percent.astype(float) df = df.sort_values(by='percent',ascending=[0]) # df = df.set_index('code') # print type(count[0]) # print type(list(count[0])) # print count[0].decode('unicode-escape') return df