def updateAdjusted(row, client, table): ticker = row['Ticker'].strip() start_date = row['IssueDate'] end = row['UpdateDate'] try: data = getDataWithAdjClose(client, ticker, 'TWSE', start_date, end) data['Ticker'] = ticker data.index.name = 'Date' data = data.reset_index() data.rename(columns={ '_id': 'idx', 'Adj Close': 'Adj_Close' }, inplace=True) datas = [dict(x._asdict()) for x in data.itertuples()] with ThreadPoolExecutor(20) as executor: exes = [executor.submit(update_data, d) for d in datas] datas = [exe.result() for exe in exes] except Exception as e: print(e) pass except KeyboardInterrupt: os._exit(0) else: with ThreadPoolExecutor(20) as executor: results = [ executor.submit(update_one, table, data) for data in datas ] [res.result() for res in results] Tele.sendMessage(f'update Adj Close of {ticker} Success') return Tele.sendMessage(f'update Adj Close of {ticker} Failed')
if not os.path.isdir(path): path = os.getcwd() def update_data_dict(d): del d['Index'] d['Adj Close'] = d['Close'] return d def update_one(table, d): table.update_one(d, {'$set': d}, upsert=True) time.sleep(0.001) if __name__ == '__main__': client = Mongo() table = client['admin']['TWSE']['historicalPrice'] table.create_index([('Date', 1), ('Ticker', 1)], unique=True) df = read_csv(os.path.join(path, 'TWSE_HistoricalPrice.txt'), sep='\t') data = [dict(x._asdict().items()) for x in df.itertuples()] start_time = time.time() with ThreadPoolExecutor(50) as executor: exes = [executor.submit(update_data_dict, d) for d in data] data = [exe.result() for exe in exes] table.insert_many(data) duration = round((time.time() - start_time) / 60, 4) Tele().sendMessage(f'insert {len(data)} data use {duration} mins', group='UpdateMessage')
afterhour_table.insert_many(AfterHour_Odds) except: time.sleep(10) time.sleep(5) else: #null db or haven't up to date dates = date_range(afterhour_start, datetime.today()) print('First time for create DB or haven\'t up to date') for date in dates: print(f'crawl {date}') try: # crawl Intraday and insert to mongo AfterHour_Odds = OddLot_AfterHour(date) if AfterHour_Odds is not None: afterhour_table.insert_many(AfterHour_Odds) except: time.sleep(10) try: # crawl AfterHour and insert to mongo if date >= intraday_start: Intraday_Odds = OddLot_Intraday(date) if Intraday_Odds is not None: intraday_table.insert_many(Intraday_Odds) except: time.sleep(10) time.sleep(5) # send finish message Tele().sendMessage('Update TWSE Odd Lot Success.', group='UpdateMessage')
end_date = datetime.today() dates = date_range(start_date, end_date) full_Kbar = [] for date in dates: try: print(f'crawl {date}') full_dict, kBar = crawl5SecIndex(date) except Exception as e: print(f'{date} has no data') time.sleep(10) continue else: try: Intraday_table.insert_many(full_dict) except: pass try: Interday_table.insert_many(kBar) except: pass time.sleep(5) duration = time.time() - start_time except Exception as e: Tele().sendMessage('Update Intraday Index Data Failed', group='UpdateMessage') print(e) else: Tele().sendMessage( f'Update Intraday Index Data Success, cost {round(duration, 2)} seconds.', group='UpdateMessage')
if os.path.isfile( os.path.join(otcpath, f"{date.strftime('%Y-%m-%d')}.txt")): df_otc = read_csv(os.path.join( otcpath, f"{date.strftime('%Y-%m-%d')}.txt"), sep='\t') data_otc = [ update_data_dict(x) for x in list(df_otc.T.to_dict().values()) ] else: data_otc = OTC_HistoricalPrice(date) if len(data_otc) > 0: DataFrame(data_otc).to_csv(os.path.join( otcpath, f"{date.strftime('%Y-%m-%d')}.txt"), sep='\t', index=None, float_format='%g') table.insert_many(data_otc) print(f'Update {date} Historical Price of otc success') except Exception as e: print('otc', e) time.sleep(5) # send finish message # Line().sendMessage('Update Stock Historical Price success') Tele().sendMessage('Update Stock Historical Price success', group='UpdateMessage') except Exception as e: print(e)
import pandas as pd import os, time from modules import Tele, Mongo path = os.path.dirname(os.path.abspath(__file__)) if not os.path.isdir(path): path = os.getcwd() __updated__ = '2021-01-31 04:21:20' if __name__ == '__main__': client = Mongo() _batch_size = 15 * 1024 * 1024 table = client['admin']['USSE']['historicalPrice'] table.create_index([('Date', 1), ('Ticker', 1)], unique=True) start_time = time.time() datas = pd.read_csv(os.path.join(path, 'USSE_HistoricalPrice.txt'), sep='\t', chunksize=_batch_size) total_data = 0 for df in datas: data = [dict(x._asdict().items()) for x in df.itertuples()] for d in data: del d['Index'] table.insert_many(data) total_data += len(data) duration = round((time.time() - start_time) / 60, 4) Tele().sendMessage( f'insert {total_data} USSE Historical data use {duration} mins', group='UpdateMessage')
Options_Files = sorted(listdir(optPath)) for Name in Options_Files: if '.zip' not in Name: continue print('Option', Name) try: df = Options(Name) except: try: df = Options_Before2015(Name) except: try: df = Options_Before2010(Name) except: df = Options_multiFiles(Name) parallel_update(opt_table, createTickerOpt, df) time.sleep(1) fut_lastDate = sorted([ parseDatetime(x) for x in fut_table.distinct('Date') if x is not None ])[-1] df = Futures_Data_Daily(fut_lastDate) parallel_update(fut_table, createBrokerID, df) opt_lastDate = sorted([ parseDatetime(x) for x in opt_table.distinct('Date') if x is not None ])[-1] df = Options_Data_Daily(opt_lastDate) parallel_update(opt_table, createTickerOpt, df) Tele().sendMessage(f'盤後爬取期交所,所有期貨收盤價成功', group='UpdateMessage')
if __name__ == '__main__': # Connect to Mongo client = Mongo() # connect to historical Price table table = client['admin']['TWSE']['historicalPrice'] uniqueStock = table.distinct('Ticker') # connect to stocklist table tickers_table = client['admin']['TWSE']['StockList'] updateDate = sorted(tickers_table.distinct('UpdateDate'))[-1] # get all tickers tickers = list(tickers_table.find({'UpdateDate': {'$eq': updateDate}})) # tickers = list(tickers_table.find()) # create pool and run update Adjusted Close start_time = time.time() with ThreadPoolExecutor(20) as executor: exes = [ executor.submit(updateAdjusted, row, client, table) for row in [row for row in tickers if row['Ticker'] in uniqueStock] ] finished_process = [exe.result() for exe in exes] duration = round((time.time() - start_time) / 3600, 4) Tele().sendMessage( f'update {len(finished_process)} assets, used {duration} hours', group='UpdateMessage')
if __name__ == '__main__': start_time = time() # connect to Mongo client = Mongo() table = client['admin']['TWSE']['StockList'] # Create index table.create_index([('Ticker', 1), ('Name', 1)], unique=True) # Get Exists Stocks ExistsStocks = pd.DataFrame(list(table.find())) # request stock list, Mode {2:上市, 4:上櫃} output = requestStockList(2) + requestStockList(4) output = list(map(updateOutput, list(product(output, [ExistsStocks])))) # insert Data list( map( lambda x: table.update_one({'_id': x['_id']}, {'$set': x}, upsert=True) if '_id' in x else table.update_one(x, {'$set': x}, upsert=True), output)) duration = round((time() - start_time) / 60, 2) # print(f'Get stock list use {duration} mins') # send finish message Tele().sendMessage(f'Update Stock List success use {duration} mins', group='UpdateMessage')
return Importdata if __name__ == "__main__": client = Mongo() table = client['admin']['TWSE']['Actions'] start_date = datetime.strptime(table.distinct('Date')[-1], '%Y-%m-%d') + timedelta(days=1) date_range = date_range(start_date, datetime.today()) for date in date_range: ### Date ### Importdata = Main_Crawler(date) if Importdata is not None: if not Importdata.empty: Importdata.to_csv(os.path.join( actionpath, f'Daily_{date.strftime("%Y-%m-%d")}.txt'), sep='\t', index=None) data = [ dict(x._asdict().items()) for x in Importdata.itertuples() ] for x in data: del x['Index'] table.insert_many(data) time.sleep(5) # send finish message Tele().sendMessage('Update TWSE Actions success', group='UpdateMessage')
# Tele().sendMessage(f'爬取 {date.strftime("%Y-%m-%d")} 上市類股外資持股比例成功') def crawl_otc(schema): table_name = 'TWSE.ForeignInvestment.Industry.OTC' collections = schema.list_collection_names() table = schema[table_name] if table_name not in collections: table.create_index([('Date',1), ('Industry',1)]) start = datetime(2007,4,23) else: cnt = table.count_documents({}) if cnt == 0: start = datetime(2007,4,23) else: start = datetime.strptime(sorted(table.distinct('Date'))[-1], "%Y-%m-%d") + timedelta(1) td = datetime.today() dates = date_range(start, td) for date in dates: full_data = crawl_foriegn_holding_ratio_otc(date) if full_data: table.insert_many(full_data) # Tele().sendMessage(f'爬取 {date.strftime("%Y-%m-%d")} 上櫃類股外資持股比例成功') if __name__ == '__main__': client = Mongo() schema = client['admin'] # crawl_listed(schema) crawl_otc(schema) Tele().sendMessage(f'爬取外資持股比例成功', group='UpdateMessage')