class ClientTest(unittest.TestCase): #準備 def setUp(self): logging.basicConfig(level=logging.INFO) self.cli = Client() #收尾 def tearDown(self): self.cli.closeConnection() #測試 t146sb10 def test_request_t146sb10(self): logging.info("ClientTest.test_request_t146sb10") form_body = "encodeURIComponent=1&step=2&TYPEK=pub&co_id_1=&SDATE=20150101&EDATE=20151130&YEAR1=104&YEAR2=104&MONTH1=1&MONTH2=104&SDAY=1&EDAY=30&scope=2&sort=1&rpt=bool_t67sb07&firstin=1" ret = self.cli.requestServer("t146sb10", form_body) #測試 t67sb03 def test_request_t67sb03(self): logging.info("ClientTest.test_request_t67sb03") form_body = "encodeURIComponent=1&step=2&TYPEK=pub&co_id=5846&DATE1=20150105&SKEY=1&firstin=1" ret = self.cli.requestServer("t67sb03", form_body) #測試 parser1 def test_html_parser1(self): logging.info("ClientTest.test_html_parser1") parser1 = MopsHtmlParser_1(convert_charrefs=True) form_body = "encodeURIComponent=1&step=2&TYPEK=pub&co_id_1=&SDATE=20150101&EDATE=20151231&YEAR1=104&YEAR2=104&MONTH1=1&MONTH2=104&SDAY=1&EDAY=31&scope=2&sort=1&rpt=bool_t67sb07&firstin=1" htmldata = self.cli.requestServer("t146sb10", form_body) parser1.feed(htmldata) #測試 parser2 def test_html_parser2(self): logging.info("ClientTest.test_html_parser2") parser2 = MopsHtmlParser_2(convert_charrefs=True) form_body = "encodeURIComponent=1&co_id=5846&TYPEK=pub&DATE1=20150105&SKEY=5&step=2&firstin=1" htmldata = self.cli.requestServer("t67sb03", form_body) parser2.feed(htmldata)
def __init__(self): self.fromDate = None self.toDate = None self.cli = Client() self.progress = 0 self.progressObserver = []
class Processor: #建構子 def __init__(self): self.fromDate = None self.toDate = None self.cli = Client() self.progress = 0 self.progressObserver = [] #設定並檢查日期 def setDateRange(self, fromDate, toDate): try: self.fromDate = datetime.strptime(fromDate, "%Y%m%d") self.toDate = datetime.strptime(toDate, "%Y%m%d") except ValueError: print("日期格式錯誤,正確是:yyyymmdd") return False print("日期格式正確 from %s to %s" % (fromDate, toDate)) return True #取得日期 def getDateRange(self): if self.fromDate != None and self.toDate!= None: return (self.fromDate, self.toDate) else: return None #解析 p1_data.txt 取得 co_id DATE1 SKEY def parseP1DataLine(self, aLine): bLine = aLine.split("|#|#|#|")[4] kvDict = {} for cLine in bLine.split(";"): m = re.match(r"(.*)=(.*)" ,cLine) if m != None: kvDict[m.group(1)] = m.group(2).strip("\"") ret = (kvDict["document.fm_t67sb07.co_id.value"],\ kvDict["document.fm_t67sb07.DATE1.value"],\ kvDict["document.fm_t67sb07.SKEY.value"]) return ret #解析 temp_data.txt 取得 NameOfFund Buy/Sell NoOfUnits Currency UnitPrice TotalAmount def parseTempData(self): if os.path.exists("temp_data.txt"): tempfile = open("temp_data.txt", "r", encoding="utf-8") nof = tempfile.readline().strip("\n") others = tempfile.readline().strip("\n") #暫不解析 TODO tempfile.close() return (nof, others) else: return None #執行抓取網頁與分析程序 def runProcess(self): xlsfile = XlwtWrapper() dateRange = self.getDateRange() #form A template(SDATE, EDATE, YEAR1, YEAR2, MONTH1, MONTH2, SDAY, EDAY) formA_template = "encodeURIComponent=1&step=2&TYPEK=pub&co_id_1=&SDATE=%s&EDATE=%s&YEAR1=%d&YEAR2=%d&MONTH1=%d&MONTH2=%d&SDAY=%d&EDAY=%d&scope=2&sort=1&rpt=bool_t67sb07&firstin=1" formA_body = formA_template % (dateRange[0].strftime("%Y%m%d"), #SDATE dateRange[1].strftime("%Y%m%d"), #EDATE dateRange[0].year - 1911, #YEAR1 dateRange[1].year - 1911, #YEAR2 dateRange[0].month, #MONTH1 dateRange[1].year - 1911, #MONTH2 dateRange[0].day, #SDAY dateRange[1].day) #EDAY res_t146sb10 = self.cli.requestServer("t146sb10", formA_body) self.cli.closeConnection() parser1 = MopsHtmlParser_1(convert_charrefs=True) parser1.feed(res_t146sb10) #p1_data.txt file 已建立 p1file = open("p1_data.txt", "r", encoding="utf-8") lines = len(p1file.readlines())#總筆數計算執行進度 (pointer 已被移到EOF) p1file.seek(0) #pointer 移到最開始位置 handledLine = 0 for aLine in p1file:#逐行解析 handledLine = handledLine+1 self.progress = int((handledLine/lines)*100) for ob in self.progressObserver:#通知 observer 目前進度 ob.updateProgress(self.progress) #observer 需實作 updateProgress if handledLine%100 == 0: time.sleep(randint(30, 60))#每100筆資料休息30-60秒 time.sleep(randint(1, 3))#每筆資料休息1-3秒 (co_id, DATE1, SKEY) = self.parseP1DataLine(aLine) #form B template (co_id, DATE1, SKEY) formB_template = "encodeURIComponent=1&step=2&TYPEK=pub&co_id=%s&DATE1=%s&SKEY=%s&firstin=1" formB_body = formB_template % (co_id, DATE1, SKEY) try: res_t67sb03 = self.cli.requestServer("t67sb03", formB_body) except Exception: print("t67sb03 連線被拒絕。(略過)") continue finally: self.cli.closeConnection() parser2 = MopsHtmlParser_2(convert_charrefs=True) parser2.feed(res_t67sb03) p2_data = parser2.getP2Data() p1_data = aLine.split("|#|#|#|") xlsfile.addRowData((p1_data[2], p1_data[1], p2_data["B/S"], p2_data["nof"], p2_data["No. of U"], "NA", p2_data["Unit Price"], p2_data["Total Amount"], p2_data["comment"])) p1file.close() xlsfile.saveExcelFile() #註冊觀察進度者物件 def registerProgressObserver(self, observer): self.progressObserver.append(observer)
def setUp(self): logging.basicConfig(level=logging.INFO) self.cli = Client()