Пример #1
class Money_link():
    def __init__(self):
        self.mongo = MongodbAPI()

    def start(self, stock_num: str) -> list:
        source_url = MONEYLINKURL % (stock_num)
        return self.parser(stock_num, source_url)

    def parser(self, stock_num, url) -> list:
        daily = []
        htmlparser = HtmlRequests()
        tree = htmlparser.get_html(url)
        if tree == None:
            return daily
        now = datetime.now()
        for i in tree.xpath('//div[@id="TickHeight"]/table/tr'):
            time = i.xpath('td[1]/text()')[0]
            buying = i.xpath('td[2]/text()')[0]
            selling = i.xpath('td[3]/text()')[0]
            if buying == '--' or selling == '--':
            transaction = i.xpath('td[4]/text()')[0]
            tmp_ups_and_downs = i.xpath('td[5]/text()')[0].split(" ")
            ups_and_downs = ""
            if len(tmp_ups_and_downs) < 2:
                ups_and_downs = "0.0"
            elif tmp_ups_and_downs[0] == "▼":
                ups_and_downs = "-" + tmp_ups_and_downs[1]
            elif tmp_ups_and_downs[0] == "▲":
                ups_and_downs = tmp_ups_and_downs[1]
            stock_volume = i.xpath('td[6]/text()')[0]
            time_tmp = time.split(':')
            date = datetime(now.year, now.month, now.day, int(time_tmp[0]),
                            int(time_tmp[1]), int(time_tmp[2]))
            if self.mongo.CheckExists("Transaction_details",
                                      str(stock_num) + "@" + date.isoformat()):
                '_id': stock_num + "@" + date.isoformat(),
                'ts': int(date.timestamp()),
                'stock': stock_num,
                'date': date,
                'buying': float(buying),
                'selling': float(selling),
                'transaction': float(transaction),
                'ups_and_downs': float(ups_and_downs),
                'stock_volume': int(stock_volume)
        return daily
Пример #2
class TWSE_realtime():
    def __init__(self, stock_num):
        self.mongo = MongodbAPI()
        self.stock_num = stock_num
        self.htmlreq = HtmlRequests()
        self.req = self.htmlreq.get_session(SESSIONURL)
        now = datetime.now()
        self.stop_date = datetime(now.year, now.month, now.day, 13, 30, 10)

    def start(self):

    def crawl(self):
        now = datetime.now()
        if now < self.stop_date:
            threading.Timer(5.0, self.crawl).start()
        now_time = int(time.time()) * 1000
        source_url = TWSEREALTIMEURL.format(
            stock_num=self.stock_num, time=now_time)
        json_data = self.htmlreq.get_json(self.req, source_url)
        data = self.parser(json_data)
        if data == None:
        e = self.mongo.CheckExists('Realtime_data', data.get('_id', None))
        if e == False:
            for i in range(5):
                err = self.mongo.Insert_Data_To("Realtime_data", data)
                if err:
                    logging.info("Insert realtime data to mongo, id:%s" %
                    "Fail to insert realtime data to mongo, id:%s" % (data.get("_id")))

    def parser(self, j: json):
        # Process best result
        if len(j['msgArray']) == 0:
            return None
        data = j['msgArray'][0]

        def _split_best(d):
            if d:
                return d.strip('_').split('_')
            return d

        time = datetime.fromtimestamp(
            int(data['tlong']) / 1000).strftime('%Y-%m-%d %H:%M:%S')
        date = datetime.strptime(time, '%Y-%m-%d %H:%M:%S')
        return {
            "_id": str(self.stock_num) + "@"+time,
            "code": self.stock_num,
            'ts': int(time.mktime(date.timetuple())),
            "time": date,
            "latest_trade_price": float(data.get('z', None)),
            "trade_volume": float(data.get('tv', None)),
            "accumulate_trade_volume": float(data.get('v', None)),
            "best_bid_price": [float(x) for x in _split_best(data.get('b', None))],
            "best_bid_volume": [float(x) for x in _split_best(data.get('g', None))],
            "best_ask_price": [float(x) for x in _split_best(data.get('a', None))],
            "best_ask_volume": [float(x) for x in _split_best(data.get('f', None))],
            "open": float(data.get('o', None)),
            "high": float(data.get('h', None)),
            "low": float(data.get('l', None))
Пример #3
class TWSE_daily():
    def __init__(self, stock_num: str, start_year: int, start_month: int):
        self.mongo = MongodbAPI()
        self.stock_num = stock_num
        self.htmlreq = HtmlRequests()
        self.req = self.htmlreq.get_session(SESSIONURL)
        self.req.keep_alive = False
        self.start_year = start_year
        self.start_month = start_month
        self.now_date = datetime.now()
        self.retry = 0

    def start(self):
        now_year = self.start_year
        now_month = self.start_month
        self.crawl(now_year, now_month)

    def crawl(self, year, month):
        logging.debug("%s/%s" % (year, month))
        source_url = TWSEREALTIMEURL.format(
            stock_num=self.stock_num, time="%d%02d01" % (year, month))
        json_data = self.htmlreq.get_json(self.req, source_url)
        if json_data == {} and self.retry < 5:
            self.retry += 1
            self.crawl(year, month)
            logging.error("Can't get old daily stock %s@%s-%s ,url : %s " %
                          (self.stock_num, year, month, source_url))
        self.retry = 0
        data = self.parser(json_data.get('data', None))

        if data != None and len(data) > 0:
            for i in range(5):
                err = self.mongo.Insert_Many_Data_To("Daily_data", data)
                if err == True:
                    logging.info("Insert Daily data %s@%s-%s" %
                                 (self.stock_num, year, month))
                logging.error("Fail, Insert Daily data to Mongo,id: %s@%s-%s" %
                              (self.stock_num, year, month))
        date = self._get_next_date(year, month)
        if date['year'] >= self.now_date.year and date['month'] > self.now_date.month:
            logging.info("Done crawl Daily data , %s@%s/%s" %
                         (self.stock_num, date['year'], date['month']))
        # Start to crawl new year, month
        self.crawl(date["year"], date["month"])

    def _convert_date(self, date):
        """Convert '106/05/01' to '2017/05/01'"""
        return '/'.join([str(int(date.split('/')[0]) + 1911)] + date.split('/')[1:])

    def parser(self, j: json) -> list:
        data = []
        if j == None:
            return data
        for item in j:
            date = datetime.strptime(
                self._convert_date(item[0]), '%Y/%m/%d')
            _id = self.stock_num + "@"+date.strftime("%Y/%m/%d")

            e = self.mongo.CheckExists(
                "Daily_data", _id)
            if e:
                logging.debug("Insert Daily data ,id :%s exists" %
                    '_id': _id,
                    'stock': self.stock_num,
                    'date': date,
                    'ts': int(time.mktime(date.timetuple())),
                    'capacity': int(item[1].replace(',', '')),
                    'turnover': int(item[2].replace(',', '')),
                    'open': self._get_float(item[3]),
                    'high': self._get_float(item[4]),
                    'low': self._get_float(item[5]),
                    'close':  self._get_float(item[6]),
                    'change':  self._get_float(item[7]),
                    'transaction': int(item[8].replace(',', ''))
            except Exception as e:
                logging.error("daily data fail :%s %s" % (item, e))
        return data

    def _get_next_date(self, year, month) -> dict:
        if month < 12:
            month += 1
            year += 1
            month = 1
        return {
            'year': year,
            'month': month

    def _get_float(self, number: str):
        if number.replace(',', '') == 'X0.00':
            return 0.0
        elif number == '--':
            return None
            return float(number.replace(',', ''))