Пример #1
0
    def set_components_data(self, cdate = datetime.now().strftime('%Y-%m-%d')):
        table_name = self.get_components_table_name(cdate)
        if not self.is_table_exists(table_name):
            if not self.create_components_table(table_name):
                logger.error("create components table failed")
                return False

        if self.is_date_exists(table_name, cdate): 
            logger.debug("existed table:%s, date:%s" % (table_name, cdate))
            return True

        url          = self.INDEX_URLS[self.code][0]
        columns      = self.INDEX_URLS[self.code][1]
        column_names = self.INDEX_URLS[self.code][2]
        df = smart_get(pd.read_excel, url, usecols = columns)
        if df is None:
            logger.error("data for %s is empty" % self.code)
            return False
        df.columns   = column_names
        df.code      = df.code.astype('str').str.zfill(6)
        df['date']   = cdate
        if 'wieight' not in df.columns:
            df['weight'] = 1/len(df)
        if 'flag' not in df.columns:
            df['flag']   = 1
        df = df.reset_index(drop = True)

        if is_df_has_unexpected_data(df):
            logger.error("data for %s is not clear" % self.code)
            return False

        if self.mysql_client.set(df, table_name):
            if self.redis.sadd(table_name, cdate): return True
        return False
Пример #2
0
    def set_data(self, cdate=datetime.now().strftime('%Y-%m-%d')):
        table_name = self.get_table_name(cdate)
        if not self.is_table_exists(table_name):
            if not self.create_table(table_name):
                self.logger.error("create tick table failed")
                return False
            self.redis.sadd(self.dbname, table_name)

        if self.is_date_exists(table_name, cdate):
            self.logger.debug("existed table:%s, date:%s" %
                              (table_name, cdate))
            return True

        total_df = smart_get(self.crawler.margin,
                             trade_date=transfer_date_string_to_int(cdate))
        if total_df is None:
            self.logger.error("crawel margin for %s failed" % cdate)
            return False

        total_df = total_df.rename(columns={
            "trade_date": "date",
            "exchange_id": "code"
        })
        total_df['rqyl'] = 0
        total_df['rqchl'] = 0

        detail_df = smart_get(self.crawler.margin_detail,
                              trade_date=transfer_date_string_to_int(cdate))
        if detail_df is None:
            self.logger.error("crawel detail margin for %s failed" % cdate)
            return False

        detail_df = detail_df.rename(columns={
            "trade_date": "date",
            "ts_code": "code"
        })

        total_df = total_df.append(detail_df, sort=False)
        total_df['date'] = pd.to_datetime(
            total_df.date).dt.strftime("%Y-%m-%d")
        total_df = total_df.reset_index(drop=True)
        if self.mysql_client.set(total_df, table_name):
            time.sleep(1)
            return self.redis.sadd(table_name, cdate)
        return False
Пример #3
0
 def init(self):
     df = smart_get(ts.get_stock_basics)
     if df is None: return False 
     df = df.reset_index(drop = False)
     return self.redis.set(ct.STOCK_INFO, _pickle.dumps(df, 2))
Пример #4
0
 def get_data_from_url(self, cdate = datetime.now().strftime('%Y-%m-%d')):
     if self.market == ct.SH_MARKET_SYMBOL:
         current_milli_time = lambda: int(round(time.time() * 1000))
         url = self.get_url() % (int_random(5), cdate, current_milli_time())
         response = smart_get(requests.get, url, headers=self.header)
         if response.status_code != 200:
             self.logger.error("get exchange data failed, response code:%s" % response.status_code)
             return pd.DataFrame()
         json_result = loads_jsonp(response.text)
         if json_result is None:
             self.logger.error("parse exchange data jsonp failed")
             return pd.DataFrame()
         datas = list()
         for json_obj in json_result['result']:
             name = self.get_sh_type_name(json_obj['productType'])
             if name is None:
                 self.logger.error("get unknown type for SH data:%s" % json_obj['productType'])
                 return pd.DataFrame()
             elif name == "科创板":
                 continue
             else:
                 amount           = 0 if json_obj['trdAmt'] == '' else float(json_obj['trdAmt'])
                 number           = 0 if json_obj['istVol'] == '' else int(json_obj['istVol'])
                 negotiable_value = 0 if json_obj['negotiableValue'] == '' else float(json_obj['negotiableValue'])
                 market_value     = 0 if json_obj['marketValue'] == '' else float(json_obj['marketValue'])
                 volume           = 0 if json_obj['trdVol'] == '' else float(json_obj['trdVol'])
                 pe               = 0 if json_obj['profitRate'] == '' else float(json_obj['profitRate'])
                 transactions     = 0 if json_obj['trdTm'] == '' else float(json_obj['trdTm'])
                 turnover         = 0 if json_obj['exchangeRate'] == '' else float(json_obj['exchangeRate'])
                 outstanding      = 0 if turnover == 0 else volume / (100 * turnover)
                 totals           = outstanding
                 data = {'amount': amount,\
                         'number': number,\
                         'negotiable_value': negotiable_value,\
                         'market_value': market_value,\
                         'pe': pe,\
                         'totals': totals,\
                         'outstanding': outstanding,\
                         'volume': volume,\
                         'transactions': transactions,\
                         'turnover': turnover}
                 if any(data.values()):
                     data['name'] = name
                     data['date'] = cdate
                     datas.append(data)
         df = pd.DataFrame.from_dict(datas)
     else:
         datas = list()
         for name, tab in ct.SZ_MARKET_DICT.items():
             url = self.get_url() % (tab, cdate, float_random(17))
             df = smart_get(pd.read_excel, url, usecols = [0, 1])
             if df is None: return pd.DataFrame()
             if df.empty: continue
             if len(df) == 1 and df.values[0][0] == '没有找到符合条件的数据!': continue
             if name == "深圳市场":
                 amount           = 0
                 #amount           = float(df.loc[df['指标名称'] == '市场总成交金额(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                 number           = int(float(df.loc[df['指标名称'] == '上市公司数', '本日数值'].values[0].replace(',', '')))
                 negotiable_value = float(df.loc[df['指标名称'] == '股票流通市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                 market_value     = float(df.loc[df['指标名称'] == '股票总市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                 pe               = float(df.loc[df['指标名称'] == '股票平均市盈率', '本日数值'].values[0].replace(',', ''))
                 totals           = float(df.loc[df['指标名称'] == '股票总股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000
                 outstanding      = float(df.loc[df['指标名称'] == '股票流通股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000
                 volume           = 0
                 transactions     = 0
                 turnover         = float(df.loc[df['指标名称'] == '股票平均换手率', '本日数值'].values[0])
             else:
                 amount           = float(df.loc[df['指标名称'] == '总成交金额(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                 number           = int(float(df.loc[df['指标名称'] == '上市公司数', '本日数值'].values[0].replace(',', '')))
                 negotiable_value = float(df.loc[df['指标名称'] == '上市公司流通市值(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                 market_value     = float(df.loc[df['指标名称'] == '上市公司市价总值(元)', '本日数值'].values[0].replace(',', '')) / 100000000
                 pe               = float(df.loc[df['指标名称'] == '平均市盈率(倍)', '本日数值'].values[0])
                 totals           = float(df.loc[df['指标名称'] == '总发行股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000
                 outstanding      = float(df.loc[df['指标名称'] == '总流通股本(股)', '本日数值'].values[0].replace(',', '')) / 100000000
                 volume           = float(df.loc[df['指标名称'] == '总成交股数', '本日数值'].values[0].replace(',', '')) / 100000000
                 transactions     = float(df.loc[df['指标名称'] == '总成交笔数', '本日数值'].values[0].replace(',', '')) / 10000
                 turnover         = 100 * volume / outstanding
             data = {
                 'name': name,\
                 'date': cdate,\
                 'amount': amount,\
                 'number': number,\
                 'negotiable_value': negotiable_value,\
                 'market_value': market_value,\
                 'pe': pe,\
                 'totals': totals,\
                 'outstanding': outstanding,\
                 'volume': volume,\
                 'transactions': transactions,\
                 'turnover': turnover
             }
             datas.append(data)
         df = pd.DataFrame.from_dict(datas)
         if not df.empty:
             df.at[df.name == "深圳市场", 'amount']       = df.amount.sum() - df.loc[df.name == "深圳市场", 'amount']
             df.at[df.name == "深圳市场", 'volume']       = df.volume.sum() - df.loc[df.name == "深圳市场", 'volume']
             df.at[df.name == "深圳市场", 'transactions'] = df.transactions.sum() - df.loc[df.name == "深圳市场", 'transactions']
     return df
Пример #5
0
    def process(self, req_date):
        [year, month, day] = req_date.split('-')
        if not self.smart_call(self.driver.set_page_load_timeout, None, 30):
            self.logger.error("set page load timeout failed.")
            return None

        if not self.smart_call(self.driver.get, None, self.link):
            self.logger.error("%s get source page failed." % self.link)
            return None

        element = smart_get(self.driver.find_element_by_id,
                            'txtShareholdingDate')
        if element is None:
            self.logger.error(
                "%s find txtShareholdingDate element by xpath failed." %
                self.link)
            return None

        if not self.smart_call(element.click, None):
            self.logger.error(
                "%s get txtShareholdingDate element by xpath failed." %
                self.link)
            return None

        element = smart_get(
            self.driver.find_element_by_xpath,
            "//b[@class='year']//button[@data-value=%s]" % year)
        if element is None:
            self.logger.error("%s find year element by xpath failed." %
                              self.link)
            return None

        if not self.smart_call(element.click, None):
            self.logger.error("%s get find year element by xpath failed." %
                              self.link)
            return None

        element = smart_get(
            self.driver.find_element_by_xpath,
            "//b[@class='month']//button[@data-value=%s]" % (int(month) - 1))
        if element is None:
            self.logger.error("%s find month element by xpath failed." %
                              self.link)
            return None

        if not self.smart_call(element.click, None):
            self.logger.error("%s get month element by xpath failed." %
                              self.link)
            return None

        element = smart_get(
            self.driver.find_element_by_xpath,
            "//b[@class='day']//button[@data-value=%s]" % int(day))
        if element is None:
            self.logger.error("%s find day element by xpath failed." %
                              self.link)
            return None

        if not self.smart_call(element.click, None):
            self.logger.error("%s get day element by xpath failed." %
                              self.link)
            return None

        element = smart_get(self.driver.find_element_by_name, "btnSearch")
        if element is None:
            self.logger.error("%s find search button by xpath failed." %
                              self.link)
            return None

        if not self.smart_call(element.click, None):
            self.logger.error("%s search button result by xpath failed." %
                              self.link)
            return None
        return self.driver.page_source