示例#1
0
    def run(self):
        self.dataBase = createProdDataBase()
        self.renrenAccountPool = createProdRenrenAccountPool()
        for i in range(0, self.ROUND_NUMBER):
            log.info('>>>>>>>>  Main Crawl Thread Round(%s)  <<<<<<<<' % (i+1))

            if self.dataBase.needMoreStartNode():
                startNodeCrawler = StartNodeCrawler(\
                    self.dataBase, self.renrenAccountPool)
                startNodeCrawler.startCrawling()

            self.startMultiThreadCrawling(self.THREAD_NUMBER)
            #self.startMultiThreadCrawlingWithProxy(1)
            #manager.startSignleThreadCrawling()

            try:
                Crawler.detectStopSignal()
            except Exception, e:
                break

            log.info('>>>>>> Router disconnect PPPoE  <<<<<<')
            router.disconnectPPPoE()
            time.sleep(2)
            log.info('>>>>>> Router connect PPPoE  <<<<<<')
            router.connectPPPoE()
            # Wait for the connection being established.
            time.sleep(10)
示例#2
0
def test():

    log.config(GC.LOG_FILE_DIR + 'crawler_test', 'info', 'info')
    db = createConnection()
    createTables(db)
    dropTables(db)
    createTables(db)

    pool = renrenaccountpool.createProdRenrenAccountPool()
    accounts = pool.getAccounts(1)
    account = accounts[0]

    global crawler
    
    try:
        crawler = Crawler(db)
        agent = RenrenAgent(account)
        agent.login()
        crawler.setAgent(agent)
        id = "322601086"
        crawler.crawl(id, 30)
    except CrawlerException, e:
        log.info("Crawler end, reason: " + str(e))
        if e.errorCode == CrawlerErrorCode.DETECT_STOP_SIGNAL:
            print "detect int signal"
            return
示例#3
0
def importFromFile(fname):
    log.config(GC.LOG_FILE_DIR + 'import_accounts', 'info', 'info')
    fileName = fname
    accounts = []
    pool = createProdRenrenAccountPool()

    with open(fileName) as importedFile:
        lines = importedFile.readlines()
        for line in lines:
            strs = line.split()
            if len(strs) < 2:
                continue # May be not a valid account
            username = strs[0] # User name first.
            password = strs[1] # And then password.
            log.info("Find username: "******"  " +\
                "password: "******"Finish importing..........\n" +\
        "Success on verify accounts number: " +\
        str(verifySuccessCount) + "\n" +\
        "Fail on verify accounts number: " +\
        str(verifyFailCount))
    log.info('Success imported number: %s' % importSuccessCount)
    log.info('Fail imported number: %s' % importFailCount)
示例#4
0
 def __init__(self, dataBase=None, accountPool=None):
     if dataBase:
         self.dataBase = dataBase
     else:
         self.dataBase = createProdDataBase()
     if accountPool:
         self.renrenAccountPool = accountPool
     else:
         self.renrenAccountPool = createProdRenrenAccountPool()
     
     self.dataBase.releaseAllStartNode()
     self.userList = []
     self.shareList = []
     self.requestCount = 0
     self.crawledShareSet = set()
示例#5
0
def main():
    log.config(GC.LOG_FILE_DIR + 'save_accounts', 'info', 'info')
    pool = createProdRenrenAccountPool()
    saveInUsingAccounts(pool)
    saveInvalidAccount(pool)