Exemplo n.º 1
0
class Craw(object):

    def __init__(self, n):
        '''
        will download n * categories * subcategories
        now, subcategories is 2, and categories is 27, then total numbers is n * 2 * 27 = n * 54
        @n: download the numbers of app in one subcatorgories
        '''
        self.n = n
        self._api = CrawlAPI()
        self._q = Queue.Queue()
        self._endProcess = False

    def crawlAppInfo(self):
        print 'start crawl app info ...'
        subcategory = ['apps_topselling_free', 'apps_topselling_new_free']
        categories = self._api.getCategories()
        for title, TAG in categories.iteritems():
            #print TAG
            for item in subcategory:
                print '----------------------------------------------'
                print 'update [%s] [%s]'%(TAG, item)
                print '----------------------------------------------'
                res = self._api.getApps(TAG, item, self.n)
                if res != {}:
                    for title, packageName in res.iteritems():
                        self._q.put((title, packageName))
                    time.sleep(random.randint(1, 5))
        print 'crawl info end ...'

    def downloadApp(self, i):
        while (not self._endProcess) or (not self._q.empty()):
            (appName, packageName) = self._q.get()
            print '----------------------------------------------'
            print 'download [%s] [%s]'%(appName, packageName)
            print '----------------------------------------------'
            self._api.download(packageName)
            if self._q.empty():
                time.sleep(5)
        print 'threading [%s] end ...'%i

    def start(self):
        '''
        start to crawl and download
        '''
        threads = []
        for i in range(THREADING_NUM):
            print 'create download threading ... %s'%i
            thread = threading.Thread(target=self.downloadApp, args=(i,))
            thread.start()
            threads.append(thread)
        self.crawlAppInfo()
        self._endProcess = True
        for thread in threads:
            if thread.is_alive:
                thread.join()
Exemplo n.º 2
0
 def __init__(self, n):
     '''
     will download n * categories * subcategories
     now, subcategories is 2, and categories is 27, then total numbers is n * 2 * 27 = n * 54
     @n: download the numbers of app in one subcatorgories
     '''
     self.n = n
     self._api = CrawlAPI()
     self._q = Queue.Queue()
     self._endProcess = False
Exemplo n.º 3
0
class Craw(object):
    def __init__(self, n):
        '''
        will download n * categories * subcategories
        now, subcategories is 2, and categories is 27, then total numbers is n * 2 * 27 = n * 54
        @n: download the numbers of app in one subcatorgories
        '''
        self.n = n
        self._api = CrawlAPI()
        self._q = Queue.Queue()
        self._endProcess = False

    def crawlAppInfo(self):
        print 'start crawl app info ...'
        subcategory = ['apps_topselling_free', 'apps_topselling_new_free']
        categories = self._api.getCategories()
        for title, TAG in categories.iteritems():
            #print TAG
            for item in subcategory:
                print '----------------------------------------------'
                print 'update [%s] [%s]' % (TAG, item)
                print '----------------------------------------------'
                res = self._api.getApps(TAG, item, self.n)
                if res != {}:
                    for title, packageName in res.iteritems():
                        self._q.put((title, packageName))
                    time.sleep(random.randint(1, 5))
        print 'crawl info end ...'

    def downloadApp(self, i):
        while (not self._endProcess) or (not self._q.empty()):
            (appName, packageName) = self._q.get()
            print '----------------------------------------------'
            print 'download [%s] [%s]' % (appName, packageName)
            print '----------------------------------------------'
            self._api.download(packageName)
            if self._q.empty():
                time.sleep(5)
        print 'threading [%s] end ...' % i

    def start(self):
        '''
        start to crawl and download
        '''
        threads = []
        for i in range(THREADING_NUM):
            print 'create download threading ... %s' % i
            thread = threading.Thread(target=self.downloadApp, args=(i, ))
            thread.start()
            threads.append(thread)
        self.crawlAppInfo()
        self._endProcess = True
        for thread in threads:
            if thread.is_alive:
                thread.join()
Exemplo n.º 4
0
 def __init__(self, n):
     '''
     will download n * categories * subcategories
     now, subcategories is 2, and categories is 27, then total numbers is n * 2 * 27 = n * 54
     @n: download the numbers of app in one subcatorgories
     '''
     self.n = n
     self._api = CrawlAPI()
     self._q = Queue.Queue()
     self._endProcess = False