def go(self): self.log.info("Fetching URLs via local fetcher!") for url in self.urls: with db.session_context() as sess: archiver = SiteArchiver(None, sess, None) archiver.synchronousJobRequest(url, ignore_cache=True, debug=True)
def test_retrieve(url, debug=True, rss_debug=False): # try: # WebMirror.SpecialCase.startAmqpFetcher() # except RuntimeError: # Fetcher already started # pass if rss_debug: print("Debugging RSS") flags.RSS_DEBUG = True parsed = urllib.parse.urlparse(url) root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", "")) new = db.WebPages( url = url, starturl = root, netloc = parsed.netloc, distance = 50000, is_text = True, priority = 500000, type = 'unknown', fetchtime = datetime.datetime.now(), ) if debug: print(new) try: archiver = SiteArchiver(None, db.get_db_session(), None) job = archiver.synchronousJobRequest(url, ignore_cache=True) except Exception as e: traceback.print_exc() finally: db.delete_db_session()
def test(): print("Test mode!") import logSetup import settings from WebMirror.Engine import SiteArchiver logSetup.initLogging() urls = [ 'https://royalroadl.com/api/fiction/updates?apiKey=' + settings.RRL_API_KEY, # 'https://royalroadl.com/api/fiction/newreleases?apiKey=' + settings.RRL_API_KEY, ] for url in urls: with db.session_context() as sess: archiver = SiteArchiver(None, sess, None) archiver.synchronousJobRequest(url, ignore_cache=True)
def exposed_fetch(url, debug=True, rss_debug=False): ''' Do a synchronous fetch of content from url `url`. ''' # try: # WebMirror.SpecialCase.startAmqpFetcher() # except RuntimeError: # Fetcher already started # pass if rss_debug: print("Debugging RSS") flags.RSS_DEBUG = True parsed = urllib.parse.urlparse(url) root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", "")) new = db.WebPages( url=url, starturl=root, netloc=parsed.netloc, distance=50000, is_text=True, priority=500000, type='unknown', fetchtime=datetime.datetime.now(), ) if debug: print(new) try: with db.session_context() as sess: archiver = SiteArchiver(None, sess, None) archiver.synchronousJobRequest(url, ignore_cache=True) except Exception as e: traceback.print_exc()
def exposed_fetch(url, debug=True, rss_debug=False): ''' Do a synchronous fetch of content from url `url`. ''' # try: # WebMirror.SpecialCase.startAmqpFetcher() # except RuntimeError: # Fetcher already started # pass if rss_debug: print("Debugging RSS") flags.RSS_DEBUG = True parsed = urllib.parse.urlparse(url) root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", "")) new = db.WebPages( url = url, starturl = root, netloc = parsed.netloc, distance = 50000, is_text = True, priority = 500000, type = 'unknown', fetchtime = datetime.datetime.now(), ) if debug: print(new) try: archiver = SiteArchiver(None, db.get_db_session(), None) job = archiver.synchronousJobRequest(url, ignore_cache=True) except Exception as e: traceback.print_exc() finally: db.delete_db_session()
def fetch(url): with db.session_context() as sess: archiver = SiteArchiver(cookie_lock=None, db_interface=sess, new_job_queue=None) archiver.synchronousJobRequest(url, ignore_cache=True, debug=True)