예제 #1
0
 def createNewSubIE(self, subIdx):
     subNode = self.subNodes[subIdx]
     url = subNode.getAttribute("href")
     ie = IEExplorer()
     ie.openURL(url)
     ie.setVisible(1)
     self.subIESet.append(ie)
예제 #2
0
def test_TaobaoBaobeiViewer():
    url = u"http://detail.tmall.com/item.htm?id=20440371673&spm=a230r.1.14.1.0YUkKR&ad_id=&am_id=&cm_id=140105335569ed55e27b&pm_id="
    baobeiIE = IEExplorer()
    baobeiIE.openURL(url)
    baobeiIE.setVisible(1)
    targetViewer = TaobaoBaobeiViewer(baobeiIE)
    targetViewer.baobeiSrcollBeg()
    targetViewer.openCurBaobei()
예제 #3
0
    def doSearch(self):
        # open taobao
        url = u"http://www.taobao.com/"
        self.searchPageIE = IEExplorer()
        self.searchPageIE.openURL(url)
        self.searchPageIE.setVisible(1)
        while self.searchPageIE.waitBusy(IE_TIME_OUT_NEW_PAGE)==True:
            self.searchPageIE.stop()
            time.sleep(0.1)

        #input search key
        nodeSearchInput = self.getSearchUnputNode()
        nodeSearchInput.click()
        nodeSearchInput.focus()
        logging.debug(self.searchKey)
        enumHumanInput(nodeSearchInput, self.searchKey)

        #search
        nodeSearchButton = self.getSearchButtonNode()
        nodeSearchButton.focus()
        nodeSearchButton.click()
        time.sleep(3)
        self.getTargetUrlInfo()
        while self.searchPageIE.waitBusy(IE_TIME_OUT_NEW_PAGE)==True:
            if self.searchPageIE.locationURL == url:
                time.sleep(3)
            self.searchPageIE.stop()
            time.sleep(0.1)

        self.curPageIdx, self.totalPageCount = self.getPageInfo()
        dbgInfo = u"Total page num: {0}".format(self.totalPageCount)
        logging.debug(dbgInfo)
        #find the target
        self.doFindTargetBaobei()
예제 #4
0
def test_refresh():
    url = u"www.taobao.com"
    baobeiIE = IEExplorer()
    baobeiIE.openURL(url)
    baobeiIE.setVisible(1) 
    print "before sleep"
    time.sleep(20)
    print "after sleep"
    baobeiIE.getIE().Refresh()
예제 #5
0
    def doFindTargetBaobei(self):
        # page next loop
        while True:
            self.getCurPageSearchItem()
            self.procViewRandomBaobei()
            if self.isTargetInThisPage()==True:
                break
            else:
                self.curPageIdx += 1
                nextPageNode = self.getNextPageNode()
                self.searchPageIE.scrollToNode(nextPageNode)
                nextPageNode.focus()
                nextPageNode.click()
                time.sleep(4)

        dbgInfo = u"randomBaobei: " + str2unicode( str(self.randomBaobei) )
        logging.debug(dbgInfo)
        # go to the targe baobei page
        targetNode = self.allSearchPages[self.curPageIdx][self.curPageInnerIdx]
        rcd = SearchRecord(targetNode)
        summaryNode = rcd.getSummaryNode()
        self.searchPageIE.scrollToNode(summaryNode)
        summaryNode.focus()
        url = summaryNode.getAttribute(u"href")
        baobeiIE = IEExplorer()
        baobeiIE.openURL(url)
        baobeiIE.setVisible(1)
        self.targetViewer = TaobaoBaobeiViewer(baobeiIE)
        self.targetViewer.baobeiSrcollBeg()
        self.targetViewer.openCurBaobei()

        # stay in baobei page
        timeTotal = random.randint(TIME_BAOBEI_VIEW_MIN, TIME_BAOBEI_VIEW_MAX)
        timeBeg = self.targetViewer.getTimeBegOp()
        timeNow = datetime.datetime.now()
        timePass = (timeNow-timeBeg).seconds
        timeSleep = timeTotal - timePass
        if timeSleep <= 0:
            timeSleep = 10
        dbgInfo = u"stay in baobei page time: " + str2unicode(str(timeSleep))
        logging.debug(dbgInfo)
        time.sleep(timeSleep)
예제 #6
0
    def viewRandomBaobei(self, randomIdx):
        randomNode = self.allSearchPages[self.curPageIdx][randomIdx]
        rcd = SearchRecord(randomNode)
        summaryNode = rcd.getSummaryNode()
        self.searchPageIE.scrollToNode(summaryNode)
        summaryNode.focus()
        url = summaryNode.getAttribute(u"href")
        baobeiIE = IEExplorer()
        baobeiIE.openURL(url)
        baobeiIE.setVisible(1)
        isReady = baobeiIE.waitReadyState(IE_TIME_OUT_NEW_PAGE)
        timeOut = random.randint(3, 5)
        baobeiIE.stayInSubPage(timeOut)

        # set the search page into top
        while self.searchPageIE.waitBusy(IE_TIME_OUT_NEW_PAGE)==True:
            self.targetViewer.getMainIE().stop()
            time.sleep(0.1)
        self.searchPageIE.waitReadyState(IE_TIME_OUT_NEW_PAGE)
        self.searchPageIE.setForeground()
        time.sleep(1)
        #self.searchPageIE.resizeMax()
        time.sleep(1)
예제 #7
0
class BaobeiSearher(object):
    def __init__(self, searchKey, targetUrl):
        self.searchKey = searchKey
        self.targetUrl = targetUrl
        self.targetID = getTBIDFromUrl(self.targetUrl)
        self.targetTitle = None
        self.targetViewer = None
        self.searchPageIE = None
        self.curPageIdx = 0
        self.totalPageCount = 0
        self.curPageInnerIdx = -1
        self.allSearchPages = []
        self.randomBaobei = []

    def doSearch(self):
        # open taobao
        url = u"http://www.taobao.com/"
        self.searchPageIE = IEExplorer()
        self.searchPageIE.openURL(url)
        self.searchPageIE.setVisible(1)
        while self.searchPageIE.waitBusy(IE_TIME_OUT_NEW_PAGE)==True:
            self.searchPageIE.stop()
            time.sleep(0.1)

        #input search key
        nodeSearchInput = self.getSearchUnputNode()
        nodeSearchInput.click()
        nodeSearchInput.focus()
        logging.debug(self.searchKey)
        enumHumanInput(nodeSearchInput, self.searchKey)

        #search
        nodeSearchButton = self.getSearchButtonNode()
        nodeSearchButton.focus()
        nodeSearchButton.click()
        time.sleep(3)
        self.getTargetUrlInfo()
        while self.searchPageIE.waitBusy(IE_TIME_OUT_NEW_PAGE)==True:
            if self.searchPageIE.locationURL == url:
                time.sleep(3)
            self.searchPageIE.stop()
            time.sleep(0.1)

        self.curPageIdx, self.totalPageCount = self.getPageInfo()
        dbgInfo = u"Total page num: {0}".format(self.totalPageCount)
        logging.debug(dbgInfo)
        #find the target
        self.doFindTargetBaobei()
        
    def getPageInfo(self):
        scrollDelta = getScrollDelta()
        for delta in scrollDelta:
            self.searchPageIE.getWindow().scrollBy(0,delta)
        while self.searchPageIE.waitBusy(10)==True:
            self.searchPageIE.stop()
            time.sleep(0.1)
        
        body = self.searchPageIE.getBody()
        nodesSpan = getSubNodesByTag(body, u"span")
        nodesPageinfo = []
        for node in nodesSpan:
            if node.className == u"page-info":
                nodesPageinfo.append(node)
        if len(nodesPageinfo) != 1:
            return 0,1
            #raise ValueError, u"The page info element count error: {0}".format(len(nodesPageinfo))
        nodePage = nodesPageinfo[0]
        
        allTextPos = []
        allTextPos.append(u"beforeBegin")
        allTextPos.append(u"afterBegin")
        allTextPos.append(u"beforeEnd")
        allTextPos.append(u"afterEnd")
        pageStr = u""
        for text in allTextPos:
            pageStr = nodePage.getAdjacentText(text)
            if u"/" in pageStr:
                break
        dbgInfo = u"pageStr: " + str2unicode(pageStr)
        logging.debug(dbgInfo)
        infos = pageStr.split(u"/")
        #index = (int)(infos[0]) - 1
        total = (int)(infos[1])
        
        nodesStrong = getSubNodesByTag(nodePage, u"strong")
        if len(nodesStrong) != 1:
            index = 0
        else:
            nodePageIndex = nodesStrong[0]
            for text in allTextPos:
                pageStr = nodePageIndex.getAdjacentText(text)
                if len(pageStr) != 0:
                    break
            index = (int)(pageStr) - 1
        
        return index,total
        
    def doFindTargetBaobei(self):
        # page next loop
        while True:
            self.getCurPageSearchItem()
            self.procViewRandomBaobei()
            if self.isTargetInThisPage()==True:
                break
            else:
                self.curPageIdx += 1
                nextPageNode = self.getNextPageNode()
                self.searchPageIE.scrollToNode(nextPageNode)
                nextPageNode.focus()
                nextPageNode.click()
                time.sleep(4)

        dbgInfo = u"randomBaobei: " + str2unicode( str(self.randomBaobei) )
        logging.debug(dbgInfo)
        # go to the targe baobei page
        targetNode = self.allSearchPages[self.curPageIdx][self.curPageInnerIdx]
        rcd = SearchRecord(targetNode)
        summaryNode = rcd.getSummaryNode()
        self.searchPageIE.scrollToNode(summaryNode)
        summaryNode.focus()
        url = summaryNode.getAttribute(u"href")
        baobeiIE = IEExplorer()
        baobeiIE.openURL(url)
        baobeiIE.setVisible(1)
        self.targetViewer = TaobaoBaobeiViewer(baobeiIE)
        self.targetViewer.baobeiSrcollBeg()
        self.targetViewer.openCurBaobei()

        # stay in baobei page
        timeTotal = random.randint(TIME_BAOBEI_VIEW_MIN, TIME_BAOBEI_VIEW_MAX)
        timeBeg = self.targetViewer.getTimeBegOp()
        timeNow = datetime.datetime.now()
        timePass = (timeNow-timeBeg).seconds
        timeSleep = timeTotal - timePass
        if timeSleep <= 0:
            timeSleep = 10
        dbgInfo = u"stay in baobei page time: " + str2unicode(str(timeSleep))
        logging.debug(dbgInfo)
        time.sleep(timeSleep)

    def viewRandomBaobei(self, randomIdx):
        randomNode = self.allSearchPages[self.curPageIdx][randomIdx]
        rcd = SearchRecord(randomNode)
        summaryNode = rcd.getSummaryNode()
        self.searchPageIE.scrollToNode(summaryNode)
        summaryNode.focus()
        url = summaryNode.getAttribute(u"href")
        baobeiIE = IEExplorer()
        baobeiIE.openURL(url)
        baobeiIE.setVisible(1)
        isReady = baobeiIE.waitReadyState(IE_TIME_OUT_NEW_PAGE)
        timeOut = random.randint(3, 5)
        baobeiIE.stayInSubPage(timeOut)

        # set the search page into top
        while self.searchPageIE.waitBusy(IE_TIME_OUT_NEW_PAGE)==True:
            self.targetViewer.getMainIE().stop()
            time.sleep(0.1)
        self.searchPageIE.waitReadyState(IE_TIME_OUT_NEW_PAGE)
        self.searchPageIE.setForeground()
        time.sleep(1)
        #self.searchPageIE.resizeMax()
        time.sleep(1)
   
    def procViewRandomBaobei(self):
        if self.isTargetInThisPage()==True:
            targetBaobei = (self.curPageIdx, self.curPageInnerIdx)
            self.randomBaobei.append(targetBaobei)
        if len(self.allSearchPages[-1]) < 3:
            return
        
        if self.curPageIdx == 0:
            # page 0, we have a random baobei view
            randomIdx = None
            while True:
                innerIdx = random.randint(0, len(self.allSearchPages[-1])-1)
                toupleIdx = (self.curPageIdx, innerIdx)
                if toupleIdx not in self.randomBaobei:
                    randomIdx = innerIdx
                    self.randomBaobei.append(toupleIdx)
                    break
            self.viewRandomBaobei(randomIdx)

        if self.isTargetInThisPage()==True:
            # page 0, we have a random baobei view
            randomIdx = None
            while True:
                innerIdx = random.randint(0, len(self.allSearchPages[-1])-1)
                toupleIdx = (self.curPageIdx, innerIdx)
                if toupleIdx not in self.randomBaobei:
                    randomIdx = innerIdx
                    self.randomBaobei.append(toupleIdx)
                    break
            self.viewRandomBaobei(randomIdx)
                
        
    def isTargetInThisPage(self):
        return self.curPageInnerIdx != -1

    def getCurPageSearchItem(self):
        nodesItem = []
        self.allSearchPages.append(nodesItem)
        self.refreshOutAllItem()
        while self.searchPageIE.waitBusy(IE_TIME_OUT_NEW_PAGE)==True:
            self.searchPageIE.stop()
            time.sleep(0.1)

        # get all item node
        strDbgInfo = u"Page:" + str(self.curPageIdx) + u", locationURL: " + self.searchPageIE.locationURL()
        logging.debug(strDbgInfo)
        body = self.searchPageIE.getBody()
        nodesDiv = getSubNodesByTag(body, u"div")
        for node in nodesDiv:
            if node.className == u"item-box" and node.childNodes.length>=5:
                nodesItem.append(node)
        strDbgInfo = u"cur page item count: " + str(len(nodesItem))
        logging.debug(strDbgInfo)

        # get the target node
        self.allSearchPages[self.curPageIdx] = nodesItem
        for i in range(len(nodesItem)):
            node = nodesItem[i]
            try:
                rcd = SearchRecord(node)
                dbgInfo = str2unicode(str(i)) + u":" + rcd.getSummaryStr() + rcd.getSummaryID()
                logging.debug( dbgInfo )
                if self.isRecordTarget(rcd)==True:
                    self.curPageInnerIdx = i
            except:
                traceStr = traceback.format_exc()
                logging.error(traceStr)

    def isRecordTarget(self, rcd):
        if rcd.getSummaryStr() in self.targetTitle:
            titleBeg = u"<title>"
            titleEnd = u"-淘宝网</title>"
            beg = titleBeg+rcd.getSummaryStr()
            if beg in self.targetTitle:
                return rcd.getSummaryID() == self.targetID
        return False
    
    def refreshOutAllItem(self):
        #self.searchPageIE.stayInSubPage(10)
        
        for i in range(3):
            scrollDelta = getScrollDelta()
            for delta in scrollDelta:
                self.searchPageIE.getWindow().scrollBy(0,delta)
            time.sleep(IE_INTERVAL_TIME_SACROLL)
            #for delta in scrollDelta:
                #self.searchPageIE.getWindow().scrollBy(0,delta)
            #time.sleep(IE_INTERVAL_TIME_SACROLL)
            #for delta in scrollDelta:
                #self.searchPageIE.getWindow().scrollBy(0,delta)
            #time.sleep(IE_INTERVAL_TIME_SACROLL)
            
            while self.searchPageIE.waitBusy(10)==True:
                self.searchPageIE.stop()
                time.sleep(0.1)
                
            nextPageNode = self.getNextPageNode()
            if self.curPageIdx == self.totalPageCount-1:
                if nextPageNode == None:
                    break
            else:
                if nextPageNode == None:
                    self.searchPageIE.getIE().Refresh()
                    time.sleep(4)
                else:
                    break


    def getNextPageNode(self):
        body = self.searchPageIE.getBody()
        nodesA = getSubNodesByTag(body, u"a")
        nodesNextPage = []
        for node in nodesA:
            if node.className==u"page-next" and \
               node.getAttribute(u"title")==u"下一页":
                nodesNextPage.append(node)
        num = len(nodesNextPage)
        if num == 1:
            return nodesNextPage[0]
        elif num == 0:
            return None
        else:
            strDbg = u"num of next page button: " + str( len(nodesNextPage) )
            logging.error(strDbg)
            raise ValueError, strDbg
        
        
    def getTargetUrlInfo(self):
        content = urllib.urlopen(self.targetUrl).read()
        soup = BeautifulSoup(content)
        self.targetTitle = soup.find(u'title')
        self.targetTitle = str2unicode(str(self.targetTitle))
        
    def getSearchUnputNode(self):
        body = self.searchPageIE.getBody()
        nodesInput = getSubNodesByTag(body, u"input")
        nodeSearchInput = getNodeByAttr(nodesInput, u"id", u"q")
        if nodeSearchInput == None:
            raise ValueError, u"Can't find the input edit"
        return nodeSearchInput

    def getSearchButtonNode(self):
        body = self.searchPageIE.getBody()
        nodesInput = getSubNodesByTag(body, u"button")
        nodeSearchButton = None
        for node in nodesInput:
            value = u"submit"
            try: 
                if node.getAttribute(u"type")==u"submit":
                    value = None
                    value = node.getAttribute(u"tabIndex")
            except:
                a = 0
            if value==0:
                nodeSearchButton = node
                break

        if nodeSearchButton == None:
            raise ValueError, u"Can't find the submit button"
        return nodeSearchButton