def getAgentWithAccount(self): loginFailLimit = self.FAIL_LOGIN_ACCOUNT_LIMIT pool = self.renrenAccountPool loginFailAccounts = [] account = None agent = None for i in range(0, loginFailLimit): if self.accountUsed >= self.ACCOUNTS_LIMIT: # Run out of accounts credit. break self.accountUsed += 1 account = pool.getAccount() if not account: # No avaliable account in the database. break agent = RenrenAgent(account, self.proxy) agent.login() time.sleep(1) if agent.isLogin: # Login success. break else: log.warning('Thread %s login fail.' % self.threadId) loginFailAccounts.append(account) if agent and agent.isLogin: for account in loginFailAccounts: account.reportInvalidAccount(RenrenAccountErrorCode.ERROR_WHEN_LOGIN) return agent, account else: for account in loginFailAccounts: account.finishUsing() #account.reportInvalidAccount(RenrenAccountErrorCode.ERROR_WHEN_LOGIN) return None, None
def test(): log.config(GC.LOG_FILE_DIR + 'crawler_test', 'info', 'info') db = createConnection() createTables(db) dropTables(db) createTables(db) pool = renrenaccountpool.createProdRenrenAccountPool() accounts = pool.getAccounts(1) account = accounts[0] global crawler try: crawler = Crawler(db) agent = RenrenAgent(account) agent.login() crawler.setAgent(agent) id = "322601086" crawler.crawl(id, 30) except CrawlerException, e: log.info("Crawler end, reason: " + str(e)) if e.errorCode == CrawlerErrorCode.DETECT_STOP_SIGNAL: print "detect int signal" return
def saveInvalidAccount(pool): command = """ SELECT username, password FROM RenrenAccounts WHERE is_valid = 0 ORDER BY last_used_time DESC; """ pool.cursor.execute(command) rows = pool.cursor.fetchall() log.info('Total InValid account:' + str(len(rows))) failCount = 0 for row in rows: username = row[0] password = row[1] saveSuccess = False if pool.onceSaveFail(username, password): continue if failCount > 100: break; try: time.sleep(2) agent = RenrenAgent(username, password) agent.login() saveSuccess = agent.isLogin except Exception, e: log.warning('Save login fail: ' + str(e)) finally:
def verify(self): agent = RenrenAgent(self.account) agent.login() if agent.isLogin: self.success = True else: self.success = False log.info('Account Verify result: (%s, %s) >>> %s' %\ (self.account.username, self.account.password, self.success)) global lock global verifySuccessCount global verifyFailCount lock.acquire() if self.success: verifySuccessCount += 1 else: verifyFailCount += 1 lock.release()
def recursiveProfileTest( username, password, testInterval, totalCount, startList): """Run a recursive get profile test.""" generator = recursiveTestGenerator( username, password, testInterval, totalCount, startList) while True: try: id, info, errorCode = generator.next() if not errorCode: log.info('Profile url: ' + RenrenAgent.getProfileUrl(id)) path = util.saveTestPage(info.html, id) log.info('Profile local path: file://'+path) printInfo(info) except Exception, e: log.error('Error happen or end: ' + str(e)) break
def recursiveTestGenerator( username, password, testInterval, totalCount, startList): """A recursive test generator. Start from a list of user id, and get all the profile of these id and their friends and friend of the friends. Every time it gets a user profile, it will yield the (id, UserInfo, ErrorCode) Args: @username {string} the user name of the agent. @password {string} the password of the agent. @testInterval {float} the interval time between every request. @totalCount {integer} total number of profile to get. @startList {List} a list of user id to start test. """ agent = RenrenAgent(username, password) info, error = agent.login() if not error: log.info(info['name']) log.info(info['href']) else: log.error('Login error(username, password): ' +\ username + ', ' + password) count = 1 visitList = [] for elem in startList: visitList.append((elem, None)) while visitList: # Get the element to do requet. elem = visitList[0] id = elem[0] log.info('processing(' + str(count) + '): ' + id) visitList = visitList[1:] info, errorCode = agent.getProfile(id) # Error handle if errorCode: if elem[1]: log.warning('Error happen when getProfile. Refer id: ' +\ str(elem[1]) + '. Refer page url: ' +\ agent.getProfileUrl(str(elem[1]))) else: log.warning('Error happen when getProfile, no refer id.') continue # Yield result yield (id, info, errorCode) # Result handle if len(visitList) < totalCount - count: newList = [] if info.friendList: for ele in info.friendList: newList += [(ele, id)] if info.recentVisitedList: for ele in info.recentVisitedList: newList += [(ele, id)] visitList += newList # Acc count += 1 if count > totalCount: return time.sleep(testInterval)
def __init__(self, account): RenrenAgent.__init__(self, account)