def runf(self, url, debug=False): """ job is given, task_done will be called, but still need to handle various locks """ try: for np in range(1, 500): pgurl = url + '&page=%d' % np try: page = urlutil.getpage_unicode(pgurl) res = myparser.parse_appstore_catpage_applink_fetcher(page) with self.state.resultlist_lock: self.state.resultlist += res if len(res) < 50: break except Exception as e: redprint('broken at page %d, excep msg is %s' % (np, str(e))) break except config.IDError as e: print("what is this id error ?") print(str(e)) add_to_faillist(id) except IndexError as e: yellowprint('index error happends (app no dev?), id is %s' % id) add_to_faillist(id) self.state.faillist.append(id)
with open('ios-home-res.info') as f: li = f.readlines() urlli = [line.strip().split()[1] for line in li] con = [] for i, caturl in enumerate(urlli): blueprint('done with cat %d' % i) for i, lturl in enumerate(makeurls_with_letters(caturl)): greenprint('done with letter %d' % i) yellowprint('collected ids %d' % len(con)) for np in range(1, 500): print('processing page %d' % np) pgurl = lturl + '&page=%d' % np try: page = urlutil.getpage_unicode(pgurl) res = parse_appstore_catpage_applink_fetcher(page) con += res if len(res) < 50: break except Exception as e: redprint('broken at page %d, excep msg is %s' % (np, str(e))) break with open('appstore_linkres.pickle', 'wb') as f: pickle.dump(con, f) if __name__ == '__main__': page = urlutil.getpage_unicode('https://itunes.apple.com/us/app/kfz-abkurzungen/id463803338?mt=8') res = myparser.parse_appstore_apppage(page)