Python CRM примеры использования

Язык программирования: Python

Пространство имен/Пакет: com.zhyfoundry.spider.impl.CRM

Класс/Тип: CRM

Примеров на hotexamples.com: 5

Python CRM - 5 примеров найдено. Это лучшие примеры Python кода для com.zhyfoundry.spider.impl.CRM.CRM, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

saveEnterprise(2)

getCountryId(1)

Пример #1

Показать файл

Файл: Spider1.py Проект: atealxt/web-crawler

    def crawl(self, trackingTimestamp):

        config = Configuration.Configuration.readFromFile();
        countLimit = 65535 if config.maxFetchCount == -1 else config.maxFetchCount
        urlsToFetch = self.fetchURL(trackingTimestamp, countLimit)
        if len(urlsToFetch) == 0:
            print 'No URL to fetch.'
            return
        for url in urlsToFetch:
            print 'URL to fetch: ' + str(url)
            fetcher = Fetcher1.Fetcher1()
            html = fetcher.fetch(url.url, config)

            parser = Parser1.Parser1()
            parseResult = parser.parse(html, url.url)

            if parseResult.content != None:
                try:
                    CRM.saveEnterprise(parseResult.content);
                except:
                    print traceback.format_exc()

            tracker = Tracker1.Tracker1()
            basePath = url.url[:url.url.find("/", 7)]
            tracker.updateTrackTime(url.id)
            tracker.track(parseResult.newSeeds, url.id, self.id, basePath)

            print 'Sleep ' + str(config.interval) + ' second.'
            time.sleep(config.interval)

Пример #2

Показать файл

Файл: Spider2.py Проект: atealxt/web-crawler

    def crawl(self, trackingTimestamp, keyword = None):

        config = Configuration.Configuration.readFromFile();
        countLimit = 65535 if config.maxFetchCount == -1 else config.maxFetchCount
        urlsToFetch = self.fetchURL(trackingTimestamp, countLimit)
        if len(urlsToFetch) == 0:
            print 'No URL to fetch.'
            return
        fetcher = Fetcher2.Fetcher2()
        parser = Parser2.Parser2()
        count = 0
        tracker = Tracker2.Tracker2()
        for url in urlsToFetch:
            if count >= countLimit:
                print 'Fetch count limitation reached: ' + str(countLimit)
                break;
            count += 1;
            print 'URL to fetch: ' + str(url)
            html = fetcher.fetch(url.url, config)

            if parser.needLogin(html):
                print 'Need to Login'
                html = fetcher.login(self.username, self.password)
                if parser.needLogin(html):
                    raise Exception("Login fail!")
                print 'Login success!'
                html = fetcher.fetch(url.url, config)

            if parser.isDetailPage(html):
                parseResult = parser.parse(html, url.url, config)
                if parseResult.content != None:
                    try:
                        CRM.saveEnterprise(parseResult.content);
                    except:
                        print traceback.format_exc()
                    tracker.updateTrackTime(url.id)
                    tracker.track(parseResult.newSeeds, url.id, self.id, None)
            elif keyword != None:
                print 'Search term: ' + keyword
                html = fetcher.search(keyword)
                tracker.updateTrackTime(url.id)
                page = 1
                while (True):
                    parseSearchResult = parser.parseSearchResult(html)
                    tracker.track(parseSearchResult.newSeeds, url.id, self.id, None)
                    if parseSearchResult.newSeedRightNow == None or count >= countLimit:
                        print 'parseSearchResult.newSeedRightNow == None: ' + str(parseSearchResult.newSeedRightNow == None)
                        print 'count >= countLimit: ' + str(count >= countLimit)
                        break
                    page += 1
                    print 'Will crawl page ' +  str(page) + ': ' + parseSearchResult.newSeedRightNow['href']
                    print 'Sleep ' + str(config.interval) + ' second.'
                    time.sleep(config.interval)
                    html = fetcher.fetch(parseSearchResult.newSeedRightNow['href'], config)
                    if html == None:
                        retryTimes = 0
                        while (retryTimes < config.maxRetryTimes and html == None):
                            retryTimes += 1
                            print 'Retry ' + str(retryTimes)
                            html = fetcher.fetch(parseSearchResult.newSeedRightNow['href'], config)
                    count += 1

            print 'Sleep ' + str(config.interval) + ' second.'
            time.sleep(config.interval)

Пример #3

Показать файл

 def testGetCountryId(self):
     country_id = CRM.getCountryId('China');
     print 'Country Id: ' + str(country_id)
     self.assertTrue(country_id > 0);
     pass

Пример #4

Показать файл

 def testSaveEnterprise(self):
     enterprise = Enterprise('testSaveEnterprise', 'admin', '*****@*****.**', '123456', '234567', '345678', 'zhyfoundry-spider', 'remark', 'keyword', 'China')
     CRM.saveEnterprise(enterprise);
     pass

Пример #5

Показать файл

    def crawl(self, trackingTimestamp, keyword=None):

        config = Configuration.Configuration.readFromFile()
        countLimit = 65535 if config.maxFetchCount == -1 else config.maxFetchCount
        urlsToFetch = self.fetchURL(trackingTimestamp, countLimit)
        if len(urlsToFetch) == 0:
            print 'No URL to fetch.'
            return
        fetcher = Fetcher2.Fetcher2()
        parser = Parser2.Parser2()
        count = 0
        tracker = Tracker2.Tracker2()
        for url in urlsToFetch:
            if count >= countLimit:
                print 'Fetch count limitation reached: ' + str(countLimit)
                break
            count += 1
            print 'URL to fetch: ' + str(url)
            html = fetcher.fetch(url.url, config)

            if parser.needLogin(html):
                print 'Need to Login'
                html = fetcher.login(self.username, self.password)
                if parser.needLogin(html):
                    raise Exception("Login fail!")
                print 'Login success!'
                html = fetcher.fetch(url.url, config)

            if parser.isDetailPage(html):
                parseResult = parser.parse(html, url.url, config)
                if parseResult.content != None:
                    try:
                        CRM.saveEnterprise(parseResult.content)
                    except:
                        print traceback.format_exc()
                    tracker.updateTrackTime(url.id)
                    tracker.track(parseResult.newSeeds, url.id, self.id, None)
            elif keyword != None:
                print 'Search term: ' + keyword
                html = fetcher.search(keyword)
                tracker.updateTrackTime(url.id)
                page = 1
                while (True):
                    parseSearchResult = parser.parseSearchResult(html)
                    tracker.track(parseSearchResult.newSeeds, url.id, self.id,
                                  None)
                    if parseSearchResult.newSeedRightNow == None or count >= countLimit:
                        print 'parseSearchResult.newSeedRightNow == None: ' + str(
                            parseSearchResult.newSeedRightNow == None)
                        print 'count >= countLimit: ' + str(
                            count >= countLimit)
                        break
                    page += 1
                    print 'Will crawl page ' + str(
                        page
                    ) + ': ' + parseSearchResult.newSeedRightNow['href']
                    print 'Sleep ' + str(config.interval) + ' second.'
                    time.sleep(config.interval)
                    html = fetcher.fetch(
                        parseSearchResult.newSeedRightNow['href'], config)
                    if html == None:
                        retryTimes = 0
                        while (retryTimes < config.maxRetryTimes
                               and html == None):
                            retryTimes += 1
                            print 'Retry ' + str(retryTimes)
                            html = fetcher.fetch(
                                parseSearchResult.newSeedRightNow['href'],
                                config)
                    count += 1

            print 'Sleep ' + str(config.interval) + ' second.'
            time.sleep(config.interval)