Exemplo n.º 1
0
def test():

    log.config(GC.LOG_FILE_DIR + 'crawler_test', 'info', 'info')
    db = createConnection()
    createTables(db)
    dropTables(db)
    createTables(db)

    pool = renrenaccountpool.createProdRenrenAccountPool()
    accounts = pool.getAccounts(1)
    account = accounts[0]

    global crawler
    
    try:
        crawler = Crawler(db)
        agent = RenrenAgent(account)
        agent.login()
        crawler.setAgent(agent)
        id = "322601086"
        crawler.crawl(id, 30)
    except CrawlerException, e:
        log.info("Crawler end, reason: " + str(e))
        if e.errorCode == CrawlerErrorCode.DETECT_STOP_SIGNAL:
            print "detect int signal"
            return
Exemplo n.º 2
0
def main():
    reload(sys)
    sys.setdefaultencoding("utf-8")
    log.config(GC.LOG_FILE_DIR + 'import_proxies', 'info', 'info')
    #log.config(GC.LOG_FILE_DIR + 'import_proxies', 'debug', 'debug')
    importer = ProxyImporter()
    importer.start()
Exemplo n.º 3
0
def main():
    log.config(GC.LOG_FILE_DIR + "import_start_nodes", "info", "info")
    fileName = "tools/data/start_nodes"
    importCount = 0
    failCount = 0
    dataBase = database.createProdDataBase()
    with open(fileName) as importedFile:
        lines = importedFile.readlines()
        for line in lines:
            strs = line.split()
            if len(strs) < 1:
                continue  # May be not a valid account
            id = strs[0]  # Start node id.
            log.info("Importing start node: " + id)
            success = dataBase.insertIntoStartList(id)
            if success:
                importCount += 1
            else:
                failCount += 1

    log.info(
        "Finish importing..........\n"
        + "Total imported start nodes number: "
        + str(importCount)
        + "\n"
        + "Fail start nodes number: "
        + str(failCount)
    )
Exemplo n.º 4
0
def main():
    flag.processArguments()
    log.config(GC.LOG_FILE_DIR + 'CrawlManager', 'info', 'info')
    signal.signal(signal.SIGINT, detectSignal)
    waitingTime = flag.getFlag('waiting_time')
    log.info('Wait for: ' + str(waitingTime) + ' minutes')
    time.sleep(waitingTime * 60)
    manager = CrawlManager()
    manager.start()
Exemplo n.º 5
0
def main():
    #log.config(GC.LOG_FILE_DIR + 'agent_test', 'info', 'info')
    log.config(GC.LOG_FILE_DIR + 'agent_test', 'debug', 'debug')
    startList = [
        '255617816',
        '45516',
        '200656024',
        '601630763']
    #runTest('*****@*****.**', '12345678', 1, 200, startList)
    recursiveProfileTest('*****@*****.**', '12345678', 1, 20, startList)
Exemplo n.º 6
0
def main():
    log.config(GC.LOG_FILE_DIR + 'database_test', 'debug', 'debug')
    db = createConnection()
    createTables(db)
    dropTables(db)
    createTables(db)
    test(db)
    testStartList(db)
    dropTables(db)
    db.close()
    log.info("Pass the test!")
Exemplo n.º 7
0
def mainProfileTest():
    log.config(GC.LOG_FILE_DIR + 'agent_test', 'debug', 'debug')
    ids = ['67922197', '172442794', '344429329']
    agent = getTestAgent()
    agent.login()
    for root, dirs, files in os.walk(GC.TEST_STATIC_PAGE_PATH):
      for file in files:
        filePath = os.path.join(root, file)
        getProfileTest(agent, None, filePath)
    for id in ids:
        getProfileTest(agent, id)
Exemplo n.º 8
0
def main():
    log.config(GC.LOG_FILE_DIR + 'account_pool_test', 'debug', 'debug')
    pool = createTestRenrenAccountPool()
    createTables(pool)
    dropTables(pool)
    createTables(pool)
    
    testRenrenAccountPool()
    testProxyPool()
   
    dropTables(pool)
    log.info("Pass the test!")
Exemplo n.º 9
0
def importFromFile(fname):
    log.config(GC.LOG_FILE_DIR + 'import_accounts', 'info', 'info')
    fileName = fname
    accounts = []
    pool = createProdRenrenAccountPool()

    with open(fileName) as importedFile:
        lines = importedFile.readlines()
        for line in lines:
            strs = line.split()
            if len(strs) < 2:
                continue # May be not a valid account
            username = strs[0] # User name first.
            password = strs[1] # And then password.
            log.info("Find username: "******"  " +\
                "password: "******"Finish importing..........\n" +\
        "Success on verify accounts number: " +\
        str(verifySuccessCount) + "\n" +\
        "Fail on verify accounts number: " +\
        str(verifyFailCount))
    log.info('Success imported number: %s' % importSuccessCount)
    log.info('Fail imported number: %s' % importFailCount)
Exemplo n.º 10
0
def main():
    log.config(GC.LOG_FILE_DIR + "analysed_data_base_test", "debug", "debug")
    testAanlysedDataBaseImportFromResult()
    testAanlysedDataBaseImportFromFile()
    log.info("Pass the all the test!")
Exemplo n.º 11
0
            self.result.xingMap[xing] = info
        self.setInfo(info, profile)
    
    def accumulateMing(self, ming, profile):
        """Accumulate a single Name."""
        self.result.globalInfo.ming_count += 1
        info = self.result.mingMap.get(ming)
        if not info:
            info = NameHelper.getInitedRawNameItemInfo(ming)
            self.result.mingMap[ming] = info
        self.setInfo(info, profile)

    def accumulateXingMing(self, xingMing, profile):
        """Accumulate a single Name."""
        self.result.globalInfo.xing_ming_count += 1
        info = self.result.xingMingMap.get(xingMing)
        if not info:
            info = NameHelper.getInitedRawNameItemInfo(xingMing)
            self.result.xingMingMap[xingMing] = info
        self.setInfo(info, profile)

def main():
    analyser = Analyser()
    analyser.analyse()
    analyser.buildIndex()

if __name__ == "__main__":
    flag.processArguments()
    log.config(GC.LOG_FILE_DIR + 'Analyser', 'info', 'info')
    main()
Exemplo n.º 12
0
def main():
    log.config(GC.LOG_FILE_DIR + 'result_test', 'debug', 'debug')
    testResultSerialization()
    log.info("Pass the test!")
Exemplo n.º 13
0
def main():
    log.config(GC.LOG_FILE_DIR + 'import_result_to_database', 'info', 'info')
    importResultToDataBase()
Exemplo n.º 14
0
def main():
    log.config(GC.LOG_FILE_DIR + 'crawl_start_node', 'debug', 'debug')
    crawler = StartNodeCrawler()
    crawler.startCrawling()
Exemplo n.º 15
0
def main():
    log.config(GC.LOG_FILE_DIR + 'save_accounts', 'info', 'info')
    pool = createProdRenrenAccountPool()
    saveInUsingAccounts(pool)
    saveInvalidAccount(pool)
Exemplo n.º 16
0
def main():
    log.config(GC.LOG_FILE_DIR + 'readonlydatastore_test', 'debug', 'debug')
    dataStore = createProdReadOnlyDataStore()
    test(dataStore)
    dataStore.close()