def go(self):
		self.log.info("Fetching URLs via local fetcher!")

		for url in self.urls:
			with db.session_context() as sess:
				archiver = SiteArchiver(None, sess, None)
				archiver.synchronousJobRequest(url, ignore_cache=True, debug=True)
示例#2
0
def test_retrieve(url, debug=True, rss_debug=False):

	# try:
	# 	WebMirror.SpecialCase.startAmqpFetcher()
	# except RuntimeError:  # Fetcher already started
	# 	pass

	if rss_debug:
		print("Debugging RSS")
		flags.RSS_DEBUG = True

	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

	new = db.WebPages(
		url       = url,
		starturl  = root,
		netloc    = parsed.netloc,
		distance  = 50000,
		is_text   = True,
		priority  = 500000,
		type      = 'unknown',
		fetchtime = datetime.datetime.now(),
		)

	if debug:
		print(new)

	try:
		archiver = SiteArchiver(None, db.get_db_session(), None)
		job     = archiver.synchronousJobRequest(url, ignore_cache=True)
	except Exception as e:
		traceback.print_exc()
	finally:
		db.delete_db_session()
def test():
    print("Test mode!")
    import logSetup
    import settings
    from WebMirror.Engine import SiteArchiver

    logSetup.initLogging()

    urls = [
        'https://royalroadl.com/api/fiction/updates?apiKey=' +
        settings.RRL_API_KEY,
        # 'https://royalroadl.com/api/fiction/newreleases?apiKey=' + settings.RRL_API_KEY,
    ]

    for url in urls:
        with db.session_context() as sess:
            archiver = SiteArchiver(None, sess, None)
            archiver.synchronousJobRequest(url, ignore_cache=True)
def exposed_fetch(url, debug=True, rss_debug=False):
    '''
	Do a synchronous fetch of content from url `url`.
	'''

    # try:
    # 	WebMirror.SpecialCase.startAmqpFetcher()
    # except RuntimeError:  # Fetcher already started
    # 	pass

    if rss_debug:
        print("Debugging RSS")
        flags.RSS_DEBUG = True

    parsed = urllib.parse.urlparse(url)
    root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

    new = db.WebPages(
        url=url,
        starturl=root,
        netloc=parsed.netloc,
        distance=50000,
        is_text=True,
        priority=500000,
        type='unknown',
        fetchtime=datetime.datetime.now(),
    )

    if debug:
        print(new)

    try:
        with db.session_context() as sess:
            archiver = SiteArchiver(None, sess, None)
            archiver.synchronousJobRequest(url, ignore_cache=True)
    except Exception as e:
        traceback.print_exc()
def exposed_fetch(url, debug=True, rss_debug=False):
	'''
	Do a synchronous fetch of content from url `url`.
	'''

	# try:
	# 	WebMirror.SpecialCase.startAmqpFetcher()
	# except RuntimeError:  # Fetcher already started
	# 	pass

	if rss_debug:
		print("Debugging RSS")
		flags.RSS_DEBUG = True

	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

	new = db.WebPages(
		url       = url,
		starturl  = root,
		netloc    = parsed.netloc,
		distance  = 50000,
		is_text   = True,
		priority  = 500000,
		type      = 'unknown',
		fetchtime = datetime.datetime.now(),
		)

	if debug:
		print(new)

	try:
		archiver = SiteArchiver(None, db.get_db_session(), None)
		job     = archiver.synchronousJobRequest(url, ignore_cache=True)
	except Exception as e:
		traceback.print_exc()
	finally:
		db.delete_db_session()
示例#6
0
 def fetch(url):
     with db.session_context() as sess:
         archiver = SiteArchiver(cookie_lock=None,
                                 db_interface=sess,
                                 new_job_queue=None)
         archiver.synchronousJobRequest(url, ignore_cache=True, debug=True)