def crawler_callableBull(coll, table, firstday) -> Generator: def gen_url_giventype(input_date: str) -> str: return gen_url('0999C', input_date) # gen_url_giventype is local func, can not be used by global get_dict, so make sure to def get_dict locally def get_dict(date: str) -> dict: return cytoolz.compose(jsonLoadsF, get_plain_text, gen_url_giventype)(date) def craw(date: str) -> dict: return get_dict(date) def save(d: dict) -> None: print(coll.insert_one(d).inserted_id) def craw_save(date: str) -> None: crawler.craw_save(save, craw, date) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) # firstday = dt.datetime(2004, 2, 11) days_db = days_lite(table) nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) == None ] print('dates', dates) return crawler.looper(craw_save, dates)
def crawler_composite(table: str) -> Generator: def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data3'] fields = d['fields3'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) floatColumns = ['成交金額(元)', '成交股數(股)', '成交筆數'] df = ast.to_float(floatColumns, df) return df def save(df: pd.DataFrame) -> None: saver.lite(table, df) def craw_save(date: str) -> None: crawler.craw_save(save, craw, date) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) firstday = dt.datetime(2004, 2, 11) days_db = days_lite(table) nPeriods = lastdate + crawler.dt_to_str( adjust.days_trade(firstday) - days_db) # lastdate = saver.last_datetime(table) # nPeriods = crawler.input_dates(lastdate, dt.datetime.now()) return crawler.looper(craw_save, nPeriods)
def crawler_extendedCallableBear(table: str) -> Generator: gen_url_giventype = partial(gen_url, '0999X') # gen_url_giventype is local func, can not be used by global get_dict, so make sure to def get_dict locally def get_dict(date: str) -> dict: return cytoolz.compose(jsonLoadsF, get_plain_text, gen_url_giventype)(date) def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data1'] fields = d['fields1'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) df['漲跌(+/-)'] = df['漲跌(+/-)'].replace( "<p style= color:red>+</p>", 1).replace("<p style= color:green>-</p>", -1).replace('X', np.nan).replace(' ', 0) df['牛熊證觸及限制價格'] = df['牛熊證觸及限制價格'].replace('', 0).replace('*', 1).replace( '*', 1).fillna(np.nan) df['本益比'] = df['本益比'].replace('', np.nan).fillna(np.nan) intColumns = ['成交股數', '成交筆數', '最後揭示買量', '最後揭示賣量'] floatColumns = [ '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌(+/-)', '漲跌價差', '最後揭示買價', '最後揭示賣價', '本益比', '牛熊證觸及限制價格', '標的證券收盤價/指數' ] floatColumns = [col for col in floatColumns if col in list(df)] df[intColumns + floatColumns] = df[intColumns + floatColumns].replace( '', 0).fillna(np.nan) df = ast.to_int(intColumns, df) df = ast.to_float(floatColumns, df) return df def save(df: pd.DataFrame) -> None: saver.lite(table, df) def craw_save(date: str) -> None: crawler.craw_save(save, craw, date) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) firstday = dt.datetime(2014, 7, 31) days_db = days_lite(table) nPeriods = lastdate + crawler.dt_to_str( adjust.days_trade(firstday) - days_db) # lastdate = saver.last_datetime(table) # nPeriods = crawler.input_dates(lastdate, dt.datetime.now()) return crawler.looper(craw_save, nPeriods)
def mgo_hugeDeal() -> None: table = '鉅額交易日成交資訊' coll = client['tse'][table] firstday = dt.datetime(2005, 4, 4) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) days_db = days_lite(table) nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) != None] for date in dates: doc = coll.find_one({"date": date}) daily.hugeDeal(date, doc['fields'], doc['data'])
def mgo_extendedCallableBear() -> None: table = '可展延牛證' coll = db[table] firstday = dt.datetime(2014, 7, 31) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) days_db = days_lite(table) nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) != None ] for date in dates: doc = coll.find_one({"date": date}) daily.extendedCallableBear(date, doc['fields1'], doc['data1'])
def mgo_callableBear() -> None: table = '熊證(不含可展延熊證)' coll = client['tse'][table] firstday = dt.datetime(2011, 7, 8) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) days_db = days_lite(table) nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) != None ] for date in dates: doc = coll.find_one({"date": date}) daily.callableBear(date, doc['fields1'], doc['data1'])
def mgo_composite(coll) -> None: firstday = dt.datetime(2004, 2, 11) lastdate = crawler.dt_to_str([saver.last_datetime('大盤成交統計')]) days_db = days_lite('大盤成交統計') nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) != None ] for date in dates: doc = coll.find_one({"date": date}) for i in range(1, 6): field = f'fields{i}' data = f'data{i}' if field in doc: if doc[field] == ['成交統計', '成交金額(元)', '成交股數(股)', '成交筆數']: print(date, '大盤成交統計') daily.composite(date, doc[field], doc[data])
def crawler_upsAndDown(table: str) -> Generator: def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data4'] fields = d['fields4'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] data[0][1].split('(')[0] L = [] l = data[0] L.append([i.split('(')[0] for i in l]) L.append([i.split('(')[1].replace(')', '') for i in l]) l = data[1] L.append([i.split('(')[0] for i in l]) L.append([i.split('(')[1].replace(')', '') for i in l]) L.append(data[2]) L.append(data[3]) L.append(data[4]) df = pd.DataFrame(L, columns=fields).replace(',', '', regex=True).replace( '--', np.nan) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) intColumns = ['整體市場', '股票'] df = ast.to_int(intColumns, df) return df def save(df: pd.DataFrame) -> None: saver.lite(table, df) def craw_save(date: str) -> None: crawler.craw_save(save, craw, date) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) firstday = dt.datetime(2011, 8, 1) days_db = days_lite(table) nPeriods = lastdate + crawler.dt_to_str( adjust.days_trade(firstday) - days_db) # lastdate = saver.last_datetime(table) # nPeriods = crawler.input_dates(lastdate, dt.datetime.now()) return crawler.looper(craw_save, nPeriods)
def mgo_market(coll) -> None: firstday = dt.datetime(2009, 1, 5) lastdate = crawler.dt_to_str([saver.last_datetime('大盤統計資訊')]) days_db = days_lite('大盤統計資訊') nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) != None ] for date in dates: doc = coll.find_one({"date": date}) for i in range(1, 6): field = f'fields{i}' data = f'data{i}' if field in doc: if doc[field] == ['指數', '收盤指數', '漲跌(+/-)', '漲跌點數', '漲跌百分比(%)']: print(date, '大盤統計資訊') daily.market(date, doc[field], doc[data])
def mgo_upsAndDown(coll) -> None: firstday = dt.datetime(2011, 8, 1) lastdate = crawler.dt_to_str([saver.last_datetime('漲跌證券數合計')]) days_db = days_lite('漲跌證券數合計') nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) != None ] for date in dates: doc = coll.find_one({"date": date}) for i in range(1, 6): field = f'fields{i}' data = f'data{i}' if field in doc: if doc[field] == ['類型', '整體市場', '股票']: print(date, '漲跌證券數合計') daily.upsAndDown(date, doc[field], doc[data])
def crawler_close(coll, table, firstday) -> Generator: def craw(date: str) -> dict: return get_dict(date) def save(d: dict) -> None: print(coll.insert_one(d).inserted_id) def craw_save(date: str) -> None: crawler.craw_save(save, craw, date) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) # firstday = dt.datetime(2004, 2, 11) days_db = days_lite(table) nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) == None ] print('dates to craw:', dates) return crawler.looper(craw_save, dates)
def mgo_close(coll) -> None: firstday = dt.datetime(2004, 2, 11) lastdate = crawler.dt_to_str([saver.last_datetime('每日收盤行情(全部(不含權證、牛熊證))')]) days_db = days_lite('每日收盤行情(全部(不含權證、牛熊證))') nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) != None ] for date in dates: doc = coll.find_one({"date": date}) for i in range(1, 10): field = f'fields{i}' data = f'data{i}' if field in doc: if doc[field] == [ '證券代號', '證券名稱', '成交股數', '成交筆數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌(+/-)', '漲跌價差', '最後揭示買價', '最後揭示買量', '最後揭示賣價', '最後揭示賣量', '本益比' ]: print(date, '每日收盤行情(全部(不含權證、牛熊證))') daily.close(date, doc[field], doc[data])
def crawler_marketReturn(table: str) -> Generator: def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data2'] fields = d['fields2'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan) df['漲跌(+/-)'] = df['漲跌(+/-)'].replace( "<p style ='color:red'>+</p>", 1).replace("<p style ='color:green'>-</p>", -1).replace('X', 0).replace(' ', 0) df.insert(0, '年月日', date) df = df.rename(columns={'報酬指數': '指數'}) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) floatColumns = ['收盤指數', '漲跌(+/-)', '漲跌點數', '漲跌百分比(%)'] df = ast.to_float(floatColumns, df) return df def save(df: pd.DataFrame) -> None: saver.lite(table, df) def craw_save(date: str) -> None: crawler.craw_save(save, craw, date) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) firstday = dt.datetime(2009, 1, 5) days_db = days_lite(table) nPeriods = lastdate + crawler.dt_to_str( adjust.days_trade(firstday) - days_db) # lastdate = saver.last_datetime(table) # nPeriods = crawler.input_dates(lastdate, dt.datetime.now()) return crawler.looper(craw_save, nPeriods)
def crawler_close(table: str) -> Generator: def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data5'] fields = d['fields5'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan).replace('', np.nan) df['漲跌(+/-)'] = df['漲跌(+/-)'].replace( '<p style= color:red>+</p>', 1).replace('<p style= color:green>-</p>', -1).replace('X', 0).replace(' ', 0) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) floatColumns = [ '成交股數', '成交筆數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌(+/-)', '漲跌價差', '最後揭示買價', '最後揭示買量', '最後揭示賣價', '最後揭示賣量', '本益比' ] df = ast.to_float(floatColumns, df) return df def save(df: pd.DataFrame) -> None: saver.lite(table, df) def craw_save(date: str) -> None: crawler.craw_save(save, craw, date) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) firstday = dt.datetime(2004, 2, 11) days_db = days_lite(table) nPeriods = lastdate + crawler.dt_to_str( adjust.days_trade(firstday) - days_db) return crawler.looper(craw_save, nPeriods)
def craw_hugeDeal(coll) -> Generator: table = '鉅額交易日成交資訊' def craw(date: str) -> dict: return get_dict(date) def save(d: dict) -> None: print(coll.insert_one(d).inserted_id) def craw_save(date: str) -> None: crawler.craw_save(save, craw, date) firstday = dt.datetime(2005, 4, 4) lastdate = crawler.dt_to_str([saver.last_datetime(table)]) days_db = days_lite(table) nPeriods = lastdate + \ crawler.dt_to_str(adjust.days_trade(firstday) - days_db) print('nPeriods', nPeriods) dates = [ t.replace('-', '') for t in nPeriods if coll.find_one({"date": t}) == None ] print('dates', dates) return crawler.looper(craw_save, dates)
def gen_url_giventype(input_date: str) -> str: return gen_url('S', input_date) ###----鉅額交易日成交資訊---- #!!! not everyday day has huge deal, most of day there are no data # -- 1 company in 1 day may have more than 1 transaction -- def addNumberF(df): df.第幾筆 = list(range(1,len(df.第幾筆)+1)) return df lastdate = saver.last_datetime('鉅額交易日成交資訊') empty = [] def craw_hugeDeal(date: str) -> pd.DataFrame: global empty d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data'] fields = d['fields'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] if data== []: empty = empty + [date] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace('--', np.nan).replace('', np.nan)
floatColumns = ['融券賣出成交金額', '借券賣出成交金額'] df[floatColumns] = df[floatColumns].astype(float) intColumns = ['融券賣出成交數量', '借券賣出成交數量'] df[intColumns] = df[intColumns].astype(int) return df def save(df: pd.DataFrame) -> None: saver.lite('當日融券賣出與借券賣出成交量值(元)', df) def craw_save(date: str) -> None: crawler.craw_save(save, craw_margin, date) table = '當日融券賣出與借券賣出成交量值(元)' lastdate = crawler.dt_to_str([saver.last_datetime(table)]) firstday = dt.datetime(2008, 9, 26) days_db = days_lite(table) nPeriods = lastdate + crawler.dt_to_str(adjust.days_trade(firstday) - days_db) # nPeriods = crawler.input_dates(lastdate, dt.datetime.now()) generatorG = crawler.looper(craw_save, nPeriods) for _ in generatorG: pass #crawler.loop(craw_save, nPeriods) s.close()