示例#1
0
def test(url, debug=True):

	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

	new = db.WebPages(
		url       = url,
		starturl  = root,
		netloc    = parsed.netloc,
		distance  = 50000,
		is_text   = True,
		priority  = 500000,
		type      = 'unknown',
		fetchtime = datetime.datetime.now(),
		)

	if debug:
		print(new)
	archiver = SiteArchiver(None)
	ret = archiver.taskProcess(job_test=new)

	if debug:
		print(archiver)
		print(ret.keys())

		if "plainLinks" in ret and "rsrcLinks" in ret: # Looks like a HTML page. Print the relevant info
			print_html_response(archiver, new, ret)
		if "rss-content" in ret:
			print_rss_response(archiver, new, ret)


	pass
示例#2
0
def test(url, debug=True, rss_debug=False):
	if rss_debug:
		print("Debugging RSS")
		flags.RSS_DEBUG = True

	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

	new = db.WebPages(
		url       = url,
		starturl  = root,
		netloc    = parsed.netloc,
		distance  = 50000,
		is_text   = True,
		priority  = 500000,
		type      = 'unknown',
		fetchtime = datetime.datetime.now(),
		)

	if debug:
		print(new)
	archiver = SiteArchiver(None)
	archiver.taskProcess(job_test=new)