def get_data(self,i,code,url,st,et): #for i,code in enumerate(ts_code): res=req.get(url) print(code,self.name_code[i],self.industry[i],len(res.json()['list'])) jsonfile=conf.get('dir','stock')+'{}_{}_{}_{}_{}.json'.format(code,self.name_code[i],self.industry[i],st,et) with open(jsonfile,"w",encoding="utf-8") as f: f.write(json.dumps(res.json()['list'])) filename=conf.get('dir','stock')+'{}_{}_{}.csv'.format(code,self.name_code[i],self.industry[i]) data = pd.read_json(jsonfile,encoding="utf-8", orient='records') data = data.drop_duplicates(['ts_code','trade_date']) self.lock.acquire(1) if os.path.exists(filename): if self.create==0: temp=pd.read_csv(filename) data=temp.append(data) data = data.drop_duplicates(['ts_code','trade_date']) data.to_csv(filename,index=None) else: data.to_csv(filename,header=None,mode='a',index=None) else: if len(data)!=0: data.to_csv(filename,index=None) self.lock.release() #time.sleep(1) os.remove(jsonfile)
def program(self,create=1,mode='time'): ''' create: 1:清空原数据,重新爬取。0:追加,去重。 mode: 'time':按循环次数设置日期。's_e':按起止时间设置日期。 ''' self.__get_namecode__(mode) self.create=create if create==1: if os.path.exists(conf.get('dir','stock')): shutil.rmtree(conf.get('dir','stock')) os.mkdir(conf.get('dir','stock')) else: os.mkdir(conf.get('dir','stock')) pool = ThreadPool(10) param=[] for i,code in enumerate(self.ts_code): for [st,et] in self.timerank: url=conf.get('config','req_url').format(code,st,et,'ts_code') param.append(([i,code,url,st,et],None)) reqs = makeRequests(self.get_data,param) [pool.putRequest(req) for req in reqs] pool.wait()
def get_namecode(): pro = ts.pro_api(conf.get('dir','token')) ts_name_code = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') #pro.query('stock_basic', exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') #pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') ts_name_code.to_csv(conf.get('file','name'),index=None) return ts_name_code
def __get_timerank__(self,start): self.timerank=[] for i in range(int(conf.get('var','for_time'))): t=[] st=start-datetime.timedelta(days=int(conf.get('var','backday'))) et=st-self.priord t.append(et.strftime(conf.get('var','format'))) t.append(st.strftime(conf.get('var','format'))) start=et self.timerank.append(t)
def __get_namecode__(self): if os.path.exists(conf.get('file', 'name')): ts_name_code = pd.read_csv(conf.get('file', 'name')) else: ts_name_code = token.get_namecode() self.ts_code = ts_name_code['ts_code'] self.name_code = ts_name_code['name'] self.industry = ts_name_code['industry'] self.area = ts_name_code['area'] start = datetime.datetime.now() self.priord = datetime.timedelta(days=int(conf.get('var', 'timerank'))) self.__get_timerank__(start)
def __startdate_enddate__(self): startdate=datetime.datetime.strptime(conf.get('var','startdate'),'%Y%m%d') enddate=datetime.datetime.strptime(conf.get('var','enddate'),'%Y%m%d') self.timerank=[] while startdate > enddate: #for i in range(int(conf.get('var','for_time'))): t=[] st=startdate-datetime.timedelta(days=int(conf.get('var','backday'))) et=(st-self.priord) if enddate<(st-self.priord) else enddate t.append(et.strftime(conf.get('var','format'))) t.append(st.strftime(conf.get('var','format'))) startdate=et self.timerank.append(t) self.timerank=timerank[::-1]
def program(self): self.__get_namecode__() if os.path.exists(conf.get('dir', 'stock')): shutil.rmtree(conf.get('dir', 'stock')) os.mkdir(conf.get('dir', 'stock')) else: os.mkdir(conf.get('dir', 'stock')) pool = ThreadPool(10) param = [] for i, code in enumerate(self.ts_code): for [st, et] in self.timerank: url = conf.get('config', 'req_url').format(code, st, et, 'ts_code') param.append(([i, code, url, st, et], None)) reqs = makeRequests(self.get_data, param) [pool.putRequest(req) for req in reqs] pool.wait()
from py2neo import Graph, Node, Relationship, Subgraph from Config.base import conf, variable graph = Graph(host=conf.get('neo4j', 'server'), username=conf.get('neo4j', 'user'), password=conf.get('neo4j', 'pwd')) import inspect graph.run('match (n) detach delete n;') tx = graph.begin() # ### 增加 # # 可以一个一个创建 # a = Node('Person',name='bubu') # graph.create(a) # b = Node('Person',name='kaka') # graph.create(b) # r = Relationship(a,'KNOWS',b) # graph.create(r) # # 也可以一次性创建 # s = a | b | r # graph.create(s) data = { 'product': [{ 'id': '0', 'name': '重磅真丝衬衫纯色女长袖名媛优雅桑蚕丝弹力绸缎上衣', 'pid': '221725', 'price': 129, 'main_pic': 'FsHngvAP3aoUeeJbqrcCzR6qBh5V',