.format(node, memory_bytes / 1024. / 1024., settings.RABBITMQ_NODE_MEMORY_ALERT / 1024. / 1024.)) memory_logger = None @baker.command def check_rabbitmq_memory(): # Configure logging -- using the implicit Sentry handler there from django.utils.dictconfig import dictConfig dictConfig(settings.LOGGING) global memory_logger memory_logger = logging.getLogger( 'servermonitoring.healthchecks.check_rabbitmq_memory') while True: try: run_rabbitmq_check() except: memory_logger.exception('RabbitMQ memory check failed') memory_logger.info('Sleeping for %s', RABBITMQ_SLEEP) time.sleep(RABBITMQ_SLEEP.total_seconds()) if __name__ == '__main__': utils.log_to_stderr() baker.run()
return res @baker.command def eetester_qa(inf_count, procs): rows = rows_from_validated_influencers(int(inf_count)) eet = EETester(rows, emailextractor.extract_emails_from_platform, True) log.info('Processing %d rows with %d processes', len(rows), int(procs)) eet.test(int(procs)) @baker.command def eetester_inf(blog_url): infs = models.Influencer.objects.filter(blog_url__startswith=blog_url) log.info('Found infs: %s', list(infs)) eet = EETester([row_from_influencer(infs[0])], emailextractor.extract_emails_from_platform, True) eet.test(1) @baker.command def divide_log_by_thread_ident(filename='eetester.log', prefix='THR:'): from . import petester petester.divide_log_by_thread_ident(filename, prefix) if __name__ == '__main__': utils.log_to_stderr(thread_id=True) baker.run()
""") bad_post_urls = {r[0] for r in cur} log.info('Got %d bad urls', len(bad_post_urls)) infs = debra.models.Influencer.objects.filter(show_on_search=True).order_by('id') count = infs.count() num_workers = 10 slice_val = count/num_workers for inf in infs[slot*slice_val:(slot+1)*slice_val]: log.info('Processing influencer %r', inf) all_posts = inf.posts_set.all() for post in all_posts.iterator(): try: content = platformutils.iterate_resolve_shortened_urls(post.content) all_urls = contentfiltering.find_all_urls(content) log.info('Urls in post %r: %r', post, all_urls) for url in all_urls: url = utils.remove_query_params(url) if url in bad_post_urls: log.warn('Bad url: %r', url) post.brandinpost_set.all().delete() post.products_import_completed = False post.save() except: log.exception('While processing %r', post) if __name__ == '__main__': utils.log_to_stderr(['__main__', 'platformdatafetcher', 'xps', 'xpathscraper', 'requests']) baker.run()
@baker.command def print_navigation_links(url): from xpathscraper import xbrowser as xbrowsermod with xbrowsermod.XBrowser(headless_display=False, disable_cleanup=True) as xb: xb.load_url(url) clusters = find_navigation_links_clusters(xb) for cluster in clusters: print '\n' for el in cluster: print el.get_attribute('href') def resolve_redirect_using_xbrowser(url, to_sleep=5): from xpathscraper import xbrowser as xbrowsermod try: with xbrowsermod.XBrowser( headless_display=settings.AUTOCREATE_HEADLESS_DISPLAY) as xb: xb.load_url(url) time.sleep(to_sleep) return xb.driver.current_url except: log.exception('While resolve_redirect_using_xbrowser for %r', url) return url if __name__ == '__main__': utils.log_to_stderr(['__main__', 'xpathscraper', 'requests']) baker.run()