Python SiteArchiver.synchronousJobRequest示例

编程语言: Python

命名空间/包名称: WebMirror.Engine

类/类型: SiteArchiver

方法/功能: synchronousJobRequest

hotexamples.com的示例: 6

Python SiteArchiver.synchronousJobRequest - 已找到6个示例。这些是从开源项目中提取的最受好评的WebMirror.Engine.SiteArchiver.synchronousJobRequest现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SiteArchiver(6)

synchronousJobRequest(5)

taskProcess(2)

upsertResponseLinks(1)

示例#1

显示文件

文件： LocalFetchTriggers.py 项目： rrosajp/ReadableWebProxy

	def go(self):
		self.log.info("Fetching URLs via local fetcher!")

		for url in self.urls:
			with db.session_context() as sess:
				archiver = SiteArchiver(None, sess, None)
				archiver.synchronousJobRequest(url, ignore_cache=True, debug=True)

示例#2

显示文件

def test_retrieve(url, debug=True, rss_debug=False):

	# try:
	# 	WebMirror.SpecialCase.startAmqpFetcher()
	# except RuntimeError:  # Fetcher already started
	# 	pass

	if rss_debug:
		print("Debugging RSS")
		flags.RSS_DEBUG = True

	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

	new = db.WebPages(
		url       = url,
		starturl  = root,
		netloc    = parsed.netloc,
		distance  = 50000,
		is_text   = True,
		priority  = 500000,
		type      = 'unknown',
		fetchtime = datetime.datetime.now(),
		)

	if debug:
		print(new)

	try:
		archiver = SiteArchiver(None, db.get_db_session(), None)
		job     = archiver.synchronousJobRequest(url, ignore_cache=True)
	except Exception as e:
		traceback.print_exc()
	finally:
		db.delete_db_session()

示例#3

显示文件

文件： RRLJsonXmlSeriesUpdateFilter.py 项目： rrosajp/ReadableWebProxy

def test():
    print("Test mode!")
    import logSetup
    import settings
    from WebMirror.Engine import SiteArchiver

    logSetup.initLogging()

    urls = [
        'https://royalroadl.com/api/fiction/updates?apiKey=' +
        settings.RRL_API_KEY,
        # 'https://royalroadl.com/api/fiction/newreleases?apiKey=' + settings.RRL_API_KEY,
    ]

    for url in urls:
        with db.session_context() as sess:
            archiver = SiteArchiver(None, sess, None)
            archiver.synchronousJobRequest(url, ignore_cache=True)

示例#4

显示文件

文件： WebMirrorManage.py 项目： sikopet/ReadableWebProxy

def exposed_fetch(url, debug=True, rss_debug=False):
    '''
	Do a synchronous fetch of content from url `url`.
	'''

    # try:
    # 	WebMirror.SpecialCase.startAmqpFetcher()
    # except RuntimeError:  # Fetcher already started
    # 	pass

    if rss_debug:
        print("Debugging RSS")
        flags.RSS_DEBUG = True

    parsed = urllib.parse.urlparse(url)
    root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

    new = db.WebPages(
        url=url,
        starturl=root,
        netloc=parsed.netloc,
        distance=50000,
        is_text=True,
        priority=500000,
        type='unknown',
        fetchtime=datetime.datetime.now(),
    )

    if debug:
        print(new)

    try:
        with db.session_context() as sess:
            archiver = SiteArchiver(None, sess, None)
            archiver.synchronousJobRequest(url, ignore_cache=True)
    except Exception as e:
        traceback.print_exc()

示例#5

显示文件

文件： WebMirrorManage.py 项目： fake-name/ReadableWebProxy

def exposed_fetch(url, debug=True, rss_debug=False):
	'''
	Do a synchronous fetch of content from url `url`.
	'''

	# try:
	# 	WebMirror.SpecialCase.startAmqpFetcher()
	# except RuntimeError:  # Fetcher already started
	# 	pass

	if rss_debug:
		print("Debugging RSS")
		flags.RSS_DEBUG = True

	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

	new = db.WebPages(
		url       = url,
		starturl  = root,
		netloc    = parsed.netloc,
		distance  = 50000,
		is_text   = True,
		priority  = 500000,
		type      = 'unknown',
		fetchtime = datetime.datetime.now(),
		)

	if debug:
		print(new)

	try:
		archiver = SiteArchiver(None, db.get_db_session(), None)
		job     = archiver.synchronousJobRequest(url, ignore_cache=True)
	except Exception as e:
		traceback.print_exc()
	finally:
		db.delete_db_session()

示例#6

显示文件

 def fetch(url):
     with db.session_context() as sess:
         archiver = SiteArchiver(cookie_lock=None,
                                 db_interface=sess,
                                 new_job_queue=None)
         archiver.synchronousJobRequest(url, ignore_cache=True, debug=True)