def _run(self, code, start, end): ss = requests.session() code_d = 'sh' + code if code[0] == '6' else 'sz' + code f_data_list = [] # 得到日期 url = 'http://emweb.securities.eastmoney.com/PC_HSF10/ShareholderResearch/ShareholderResearchAjax?code=' + code_d ctx = ss.get(url=url, headers=header_dict) if (ctx.content != None): f_data_list = json.loads(ctx.content)['zlcc_rz'] logger.info('funds date:' + ",".join(f_data_list)) df = pd.DataFrame() for idx, f_d in enumerate(f_data_list): url = 'http://emweb.securities.eastmoney.com/PC_HSF10/ShareholderResearch/MainPositionsHodlerAjax?date=' + str( f_d) + '&code=' + code_d logger.debug(url) ctx = ss.get(url=url, headers=header_dict) df.loc[idx, 'code'] = code df.loc[idx, 'fin_year'] = DateUtil.getYear(f_d) df.loc[idx, 'fin_season'] = DateUtil.getSeason(f_d) df.loc[idx, 'fin_type'] = 1 if (ctx.content != None): data_list = json.loads(ctx.content) for d in data_list: if (d['jglx'] == u'基金'): df.loc[idx, 'fund_holding'] = StrUtil.parse_field( d['zltgbl']) if (d['jglx'] == u'社保基金'): df.loc[idx, 'sb_holding'] = StrUtil.parse_field(d['zltgbl']) return df
def _run(self, dt=None): df = self.__get_data(dt) if (df.empty): logger.info('pull hk but no date ') return df return df
def execute(self,sql): conn = self.get_conn() # 以后每次需要数据库连接就是用connection()函数获取连接就好了 cur = conn.cursor() try: logger.info('update sql:'+sql); cur.execute(sql); conn.commit(); except BaseException as be: print('create table error:',be) finally: cur.close() conn.close()
def save(self, table_name, df, primaryKeys=[Config.code, Config.db_date]): if (df.empty): logger.info('no data to save ') return exist=[]; for i in df.index: exist.append(self.__query_exist(table_name,df,i,primaryKeys)) not_exist=list(map(lambda x:not x,exist)) update_df=df[exist]; insert_df=df[not_exist] logger.info('save db count:%d,insert %d,update:%d'%(df.shape[0],insert_df.shape[0],update_df.shape[0])) self.__insert(table_name,insert_df); self.update(table_name,update_df,primaryKeys)
def job_every_day(): from k.manager.FinManager import FinManager from k.puller.SharePuller import SharePuller from k.manager.Kmanager import KManager from k.util.Logger import logger from k.util.DateUtil import DateUtil from k.util.PandasToMysql import pm if (isWeekDay()): return td = datetime.datetime.now().strftime('%Y-%m-%d') logger.info('schedule job start to work:' + td) KManager.pull_data(td) KManager.pull_data_hk(td) KManager.pull_data_hk(DateUtil.getLastDay(td)) KManager.count_kpi(td)
def pull(self, dt=None, to_mysql=True, to_csv=False): succ = True df = None try: df = self._run(dt) if (df.empty): logger.info('get empty df for ' + self.__class__.__name__) else: if (to_mysql): self._save_to_mysql(pm, df) if (to_csv): self._save_to_csv(df) except Exception as e: logger.exception('tbase puller error') succ = False digest_log.info('t-puller|%s|%s|%s|%s' % (self.__class__.__name__, dt, str(to_mysql), succ)) return succ
def __init__(self,host=ConfigDict['ip'],user=ConfigDict['user'],password=ConfigDict['password'],db_name=ConfigDict['db_name']): try: logger.info('start to build pool'); self.pool = mysql.connector.pooling.MySQLConnectionPool(pool_name="mypool", pool_size=10, host=host, port=3306, database=db_name, user=user, password=password, pool_reset_session=True) # self.pool = pool = ConnectionPool(**config); # PooledDB(MySQLdb, mincached=5,maxcached=5, maxconnections=20, host='localhost', user='******', passwd='pwd', db='myDB', port=3306) # # 打开数据库连接 #self.__conn = pymysql.connect(host, user, password, db_name,charset='utf8'); #self.__cursor=self.__conn .cursor(); #self.__engine = create_engine('mysql+pymysql://'+user+':'+password+'@'+host+'/'+db_name+'?charset=utf8') logger.info('build pool OK'); except BaseException as be: logger.exception('build error'); raise be;
def update(self, table_name, df, primaryKeys): if (df.empty): logger.info('no date to update') return if (primaryKeys == '' or primaryKeys == None): raise Exception('primaryKeys is None'); conn = self.get_conn(); cur = conn.cursor(); try: sql_prefix = 'update ' + table_name + ' set '; sql=''; #for i in np.arange(df.shape[0]): for i in df.index: chip = ''; for vi, val in enumerate(df.columns): if(val not in primaryKeys): chip =chip+ self.__parse_field__(val) + '=' + self.__parse_value__(df.loc[i, val]); if (vi < len(df.columns) - 1): chip = chip + ',' if(chip==''): raise Exception('updated columns is None'); chip += ' where '; for vi, val in enumerate(primaryKeys): chip += self.__parse_field__(val) + '=' + self.__parse_value__(df.loc[i, val]); if (vi < len(primaryKeys) - 1): chip = chip + ' and '; sql = sql_prefix + chip; cur.execute(sql); logger.debug('update sql count:'+str(df.shape[0])); conn.commit(); except BaseException as be: conn.rollback(); logger.exception('updat db error') raise be; finally: cur.close(); conn.close();
def kpi_month(code): import numpy as np df = pm.query(DbCreator.share_data_month, where='code=\'' + code + '\' and trade_date>=\'2015-01\'') logger.info('start to month reverse kpi:' + code + ' size:' + str(df.shape[0])) if (df.shape[0] == 0): logger.info('no data to kpi:') return df.sort_values(by=[Config.db_date], inplace=True) # 按列进行排序 df.index = np.arange(0, df.shape[0], 1) # 保证索引排序 df[Config.db_date] = df[Config.db_date].astype(np.str) # 日期更改 mk = MonthReverse() succ = mk.run(code, df, start=0, to_mysql=True) logger.info('end to month kpi:' + code) return succ
def kpi_s(code, start_date, pm, dict): df = pm.query(DbCreator.share_data_day, where='code=\'' + code + '\' and trade_date>=\'2011-01-01\'') logger.info('start to kpi:' + code + ' size:' + str(df.shape[0])) if (df.shape[0] == 0): logger.info('no data to kpi:') return #流水线 succ = Pipeline.execute(PreProcess(), [ TotalMarket(), CreateRecord(), DeviationsRatio(), FluctionRatio(), IncrementRatio(), Ma(), TurnRatio(), PeRank() ], code, df, start_date) logger.info('end to kpi:' + code) dict[code] = succ
def run(self, code, df, start=0, to_mysql=False, to_csv=False): succ = True try: if (df.empty or df.shape[0] == 0): logger.info('no date to count kpi') else: df = self._process(df, start) if (to_mysql): df = df.loc[start:, :] tb_name = self._get_update_table_name() pm.update(tb_name, df, primaryKeys=[Config.id]) if (to_csv): df.to_csv(code + '-base.csv') except Exception as e: logger.exception('base kpi error') print(e.__traceback__) succ = False digest_log.info('Alg-Base|%s|%s|%d|%s|%s' % (self.__class__.__name__, code, start, to_mysql, succ)) return succ