def main(self): pages = self._crawl_pages precision = lambda d: '{0:.2f}'.format(float(d.strip()) * 10000) for page in range(1, pages + 1): url = self._base_url + self._query_string.format(page) dt, code_price_volu_amou, buy_sale = self.parse_web(url) for i in range(len(dt)): # Note that this program to securities (stock and fund , fond is None) # True insert all data at dt[i] date, otherwise insert omission data rp_flag = self._latest_date < dt[i] secu, curr = secu_currency(code_price_volu_amou[i][0], 'stock', 'fund') clo_price = close_price(secu, dt[i], 'mysql') disc = '' if clo_price is None else ('%.4f' % (float(code_price_volu_amou[i][1]) - float(clo_price) - 1)) total = total_equity(secu, dt[i], 'vary', 'stock') volu = float(precision(code_price_volu_amou[i][2])) ratio = '' if total is None else ('%.4f' % ((volu / total) * 100)) name = ''.join([dt[i]] + code_price_volu_amou[i] + buy_sale[i]) uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, name)) data = { 'secu': secu, 'y': dt[i], 'buy': buy_sale[i][0], 'sale': buy_sale[i][1], 'price': code_price_volu_amou[i][1], 'disc': disc, 'ratio': ratio, 'stat': 2, 'volu': volu, 'amou': precision(code_price_volu_amou[i][3]), 'c': {'cd': curr, 'szh': '人民币' if curr == 'CNY' else '港币', 'en': curr}, 'uuid': uid, 'crt': datetime.datetime.now(), 'typ': 'szx_secu','upt': datetime.datetime.now() } if not rp_flag and not self._coll_in.get({'uuid': uid}): self._coll_in.insert(data) elif rp_flag: self._coll_in.insert(data) print u'权益类证券大宗交易 page: %d done!' % page self._coll_in.disconnect()
def main(self): pages = self._crawl_pages for page in range(1, pages + 1): url = self._base_url + self._query_string.format(page) for item in self.parse_web(url): # Note that this program to securities (stock and fund , fond is None) # True insert all data at item[0] date, otherwise insert omission data rp_flag = self._latest_date < item[0] secu, curr = secu_currency(item[1], typ_bond='bond') clo_price = close_price(secu, item[0], 'mysql') disc = '' if clo_price is None else ('%.4f' % (float(item[2]) - float(clo_price) - 1)) total = total_equity(secu, item[0], 'vary', 'stock') ratio = '' if total is None else ('%.4f' % ((float(item[3]) / total) * 100)) uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, ''.join(item))) data = { 'secu': secu, 'y': item[0], 'buy': item[4], 'sale': item[5], 'price': item[2], 'disc': disc, 'ratio': ratio, 'stat': 2, 'volu': '{0:.2f}'.format(float(item[3])), 'amou': '{0:.2f}'.format(float(item[3]) * float(item[2])), 'c': {'cd': curr, 'szh': '人民币' if curr == 'CNY' else '港币', 'en': curr}, 'uuid': uid, 'crt': datetime.datetime.now(), 'typ': 'szx_bond', 'upt': datetime.datetime.now() } if not rp_flag and not self._coll_in.get({'uuid': uid}): self._coll_in.insert(data) elif rp_flag: self._coll_in.insert(data) print u'债券大宗交易 page: %d done!' % page
def insert(typ_key, secu_bond_datas): assert typ_key == 'sf_data' or typ_key == 'bond_data' coll_in = query_string.coll_in data_list = secu_bond_datas[typ_key] if typ_key == 'sf_data': typ = 'sha_secu' keys = ['y', 'secu', 'price', 'amou', 'volu', 'buy', 'sale', 'ot'] else: typ = 'sha_bond' keys = ['y', 'secu', 'price', 'volu', 'ot'] print('Now will insert [{0}] data to mongo'.format(typ)) for data in data_list: to_data = dict(zip(keys, data)) to_data.pop('ot') if typ_key == 'sf_data': to_data['amou'] = '{0:.2f}'.format(float(to_data['amou']) * 10000) to_data['volu'] = '{0:.2f}'.format(float(to_data['volu']) * 10000) secu, curr = secu_currency(to_data['secu'], 'stock', 'fund') else: to_data['buy'], to_data['sale'] = '', '' to_data['volu'] = '{0:.2f}'.format(float(to_data['volu']) * 10000 * 10) to_data['amou'] = '{0:.2f}'.format(float(to_data['price']) * float(to_data['volu'])) secu, curr = secu_currency(to_data['secu'], typ_bond='bond') clo_price = close_price(secu, to_data['y'], 'mysql') disc = '' if clo_price is None else ('%.4f' % (float(to_data['price']) - float(clo_price) - 1)) total = total_equity(secu, to_data['y'], 'vary', 'stock') ratio = '' if total is None else ('%.4f' % ((float(to_data['volu']) / total) * 100)) uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, ''.join(data).encode('u8'))) new_dict = {'typ': typ, 'disc': disc, 'ratio': ratio, 'stat': 2, 'c': {'cd': curr, 'szh': '人民币' if curr == 'CNY' else '港币', 'en': curr}, 'uuid': uid, 'crt': datetime.now(), 'secu': secu, 'upt': datetime.now()} to_data.update(new_dict) if not coll_in.get({'uuid': uid}): coll_in.insert(to_data) else: print '%s uuid:' % typ_key, uid print('[{0}] data insert ok.'.format(typ)) coll_in.disconnect()
def main(self): pages = self._crawl_pages precision = lambda d: '{0:.2f}'.format(float(d.strip()) * 10000) for page in range(1, pages + 1): url = self._base_url + self._query_string.format(page) dt, code_price_volu_amou, buy_sale = self.parse_web(url) for i in range(len(dt)): # Note that this program to securities (stock and fund , fond is None) # True insert all data at dt[i] date, otherwise insert omission data rp_flag = self._latest_date < dt[i] secu, curr = secu_currency(code_price_volu_amou[i][0], 'stock', 'fund') clo_price = close_price(secu, dt[i], 'mysql') disc = '' if clo_price is None else ( '%.4f' % (float(code_price_volu_amou[i][1]) - float(clo_price) - 1)) total = total_equity(secu, dt[i], 'vary', 'stock') volu = float(precision(code_price_volu_amou[i][2])) ratio = '' if total is None else ('%.4f' % ((volu / total) * 100)) name = ''.join([dt[i]] + code_price_volu_amou[i] + buy_sale[i]) uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, name)) data = { 'secu': secu, 'y': dt[i], 'buy': buy_sale[i][0], 'sale': buy_sale[i][1], 'price': code_price_volu_amou[i][1], 'disc': disc, 'ratio': ratio, 'stat': 2, 'volu': volu, 'amou': precision(code_price_volu_amou[i][3]), 'c': { 'cd': curr, 'szh': '人民币' if curr == 'CNY' else '港币', 'en': curr }, 'uuid': uid, 'crt': datetime.datetime.now(), 'typ': 'szx_secu', 'upt': datetime.datetime.now() } if not rp_flag and not self._coll_in.get({'uuid': uid}): self._coll_in.insert(data) elif rp_flag: self._coll_in.insert(data) print u'权益类证券大宗交易 page: %d done!' % page self._coll_in.disconnect()