示例#1
0
	def doCall(self):

		self.log.info("Calling job %s", self.job_name)
		session = db.get_db_session()
		item = session.query(db.PluginStatus).filter(db.PluginStatus.plugin_name==self.job_name).one()
		if item.is_running:
			session.commit()
			self.log.error("Plugin %s is already running! Not doing re-entrant call!", self.job_name)
			return

		item.is_running = True
		item.last_run = datetime.datetime.now()
		session.commit()

		try:
			self._doCall()
		except Exception:
			item.last_error      = datetime.datetime.now()
			item.last_error_msg  = traceback.format_exc()
			raise
		finally:

			item2 = session.query(db.PluginStatus).filter(db.PluginStatus.plugin_name==self.job_name).one()
			item2.is_running = False
			item2.last_run_end = datetime.datetime.now()
			session.commit()
			db.delete_db_session()
		self.log.info("Job %s complete.", self.job_name)
def resetRunStates():
	print("JobSetup call resetting run-states!")
	session = db.get_db_session()
	session.query(db.PluginStatus).update({db.PluginStatus.is_running : False})
	session.commit()
	db.delete_db_session()
	print("Run-states reset.")
	def doCall(self):

		self.log.info("Calling job %s", self.job_name)
		session = db.get_db_session()
		item = session.query(db.PluginStatus).filter(db.PluginStatus.plugin_name==self.job_name).one()
		if item.is_running:
			session.commit()
			self.log.error("Plugin %s is already running! Not doing re-entrant call!", self.job_name)
			return

		item.is_running = True
		item.last_run = datetime.datetime.now()
		session.commit()

		try:
			self._doCall()
		except Exception:
			item.last_error      = datetime.datetime.now()
			item.last_error_msg  = traceback.format_exc()
			raise
		finally:

			item2 = session.query(db.PluginStatus).filter(db.PluginStatus.plugin_name==self.job_name).one()
			item2.is_running = False
			item2.last_run_end = datetime.datetime.now()
			session.commit()
			db.delete_db_session()
		self.log.info("Job %s complete.", self.job_name)
示例#4
0
def initializeStartUrls(rules):
	print("Initializing all start URLs in the database")
	sess = db.get_db_session()
	for ruleset in [rset for rset in rules if rset['starturls']]:
		for starturl in ruleset['starturls']:
			have = sess.query(db.WebPages) \
				.filter(db.WebPages.url == starturl)   \
				.count()
			if not have:
				netloc = urlFuncs.getNetLoc(starturl)
				new = db.WebPages(
						url               = starturl,
						starturl          = starturl,
						netloc            = netloc,
						type              = ruleset['type'],
						priority          = db.DB_IDLE_PRIORITY,
						distance          = db.DB_DEFAULT_DIST,
						normal_fetch_mode = ruleset['normal_fetch_mode'],
					)
				print("Missing start-url for address: '{}'".format(starturl))
				sess.add(new)
			try:
				sess.commit()
			except sqlalchemy.SQLAlchemyError:
				print("Failure inserting start url for address: '{}'".format(starturl))

				sess.rollback()
	sess.close()
	db.delete_db_session()
示例#5
0
def resetRunStates():
	print("JobSetup call resetting run-states!")
	session = db.get_db_session()
	session.query(db.PluginStatus).update({db.PluginStatus.is_running : False})
	session.commit()
	db.delete_db_session()
	print("Run-states reset.")
示例#6
0
def initializeStartUrls(rules):
	print("Initializing all start URLs in the database")
	sess = db.get_db_session()
	for ruleset in [rset for rset in rules if rset['starturls']]:
		for starturl in ruleset['starturls']:
			have = sess.query(db.WebPages) \
				.filter(db.WebPages.url == starturl)   \
				.count()
			if not have:
				netloc = urlFuncs.getNetLoc(starturl)
				new = db.WebPages(
						url               = starturl,
						starturl          = starturl,
						netloc            = netloc,
						type              = ruleset['type'],
						priority          = db.DB_IDLE_PRIORITY,
						distance          = db.DB_DEFAULT_DIST,
						normal_fetch_mode = ruleset['normal_fetch_mode'],
					)
				print("Missing start-url for address: '{}'".format(starturl))
				sess.add(new)
			try:
				sess.commit()
			except sqlalchemy.SQLAlchemyError:
				print("Failure inserting start url for address: '{}'".format(starturl))

				sess.rollback()
	sess.close()
	db.delete_db_session()
示例#7
0
def teardown_request(response):
    try:
        try:
            g.session.commit()
        except Exception:
            g.session.rollback()

        print("Returned session")

        database.delete_db_session(flask_sess_if_possible=False)
    except Exception:
        print("Failure in teardown_request()!")
        traceback.print_exc()
示例#8
0
	def do_task(self):

		db_handle = db.get_db_session()

		hadjob = False
		try:
			self.archiver = WebMirror.Engine.SiteArchiver(self.cookie_lock, new_job_queue=self.new_job_queue, response_queue=self.resp_queue, db_interface=db_handle)
			hadjob = self.archiver.taskProcess()
		finally:
			# Clear out the sqlalchemy state
			db_handle.expunge_all()
			db.delete_db_session()

		return hadjob
示例#9
0
	def do_task(self):

		db_handle = db.get_db_session()

		hadjob = False
		try:
			self.archiver = WebMirror.Engine.SiteArchiver(self.cookie_lock, new_job_queue=self.new_job_queue, response_queue=self.resp_queue, db_interface=db_handle)
			hadjob = self.archiver.taskProcess()
		finally:
			# Clear out the sqlalchemy state
			db_handle.expunge_all()
			db.delete_db_session()

		return hadjob
示例#10
0
def resetInProgress():
	print("Resetting any stalled downloads from the previous session.")

	sess = db.get_db_session()
	sess.query(db.WebPages) \
		.filter(
				(db.WebPages.state == "fetching")           |
				(db.WebPages.state == "processing")         |
				(db.WebPages.state == "specialty_deferred") |
				(db.WebPages.state == "specialty_ready")
				)   \
		.update({db.WebPages.state : "new"})
	sess.commit()
	sess.close()
	db.delete_db_session()
示例#11
0
def resetInProgress():
	print("Resetting any stalled downloads from the previous session.")

	sess = db.get_db_session()
	sess.query(db.WebPages) \
		.filter(
				(db.WebPages.state == "fetching")           |
				(db.WebPages.state == "processing")         |
				(db.WebPages.state == "specialty_deferred") |
				(db.WebPages.state == "specialty_ready")
				)   \
		.update({db.WebPages.state : "new"})
	sess.commit()
	sess.close()
	db.delete_db_session()
示例#12
0
    def get_times(self):
        conn = database.get_db_session()
        aps = conn.execute("SELECT job_state FROM apscheduler_jobs;")

        update_times = []
        for blob, in aps:
            job_dict = pickle.loads(blob)
            update_times.append(
                (job_dict['id'], job_dict['next_run_time'].isoformat()))

        data = {
            "update-times": update_times,
        }
        database.delete_db_session()

        return pack_message("system-update-times", data)
def exposed_raw_test_retrieve(url):
	'''
	Lower level fetch test, otherwise similar to `test_retreive`
	'''

	# try:
	# 	WebMirror.SpecialCase.startAmqpFetcher()
	# except RuntimeError:  # Fetcher already started
	# 	pass


	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))


	sess = db.get_db_session()

	row = sess.query(db.RawWebPages).filter(db.RawWebPages.url == url).scalar()
	if row:
		row.state = 'new'
	else:
		row = db.RawWebPages(
			url       = url,
			starturl  = root,
			netloc    = parsed.netloc,
			distance  = 50000,
			priority  = 500000,
			state     = 'new',
			fetchtime = datetime.datetime.now(),
			)
		sess.add(row)


	try:
		archiver = RawArchiver.RawEngine.RawSiteArchiver(
			total_worker_count = 1,
			worker_num         = 0,
			new_job_queue      = None,
			cookie_lock        = None,
			db_interface       = sess,
			response_queue     = None
			)
		job     = archiver.do_job(row)
	except Exception as e:
		traceback.print_exc()
	finally:
		db.delete_db_session()
示例#14
0
	def get_times(self):
		conn = database.get_db_session()
		aps = conn.execute("SELECT job_state FROM apscheduler_jobs;")

		update_times = []
		for blob, in aps:
			job_dict = pickle.loads(blob)
			update_times.append((
					job_dict['id'],
					job_dict['next_run_time'].isoformat()
				))

		data = {
			"update-times" : update_times,
		}
		database.delete_db_session()

		return pack_message("system-update-times", data)
示例#15
0
def dump_scheduled_jobs(sched):
	print("Scheduled jobs:")
	existing = sched.get_jobs()
	if not existing:
		print("	No jobs in scheduler!")

	tznow = datetime.datetime.now(tz=pytz.utc)
	for job in existing:
		print("	", job, job.args, "running in:", job.next_run_time - tznow, (job.id, ))

	session = db.get_db_session()
	running = session.query(db.PluginStatus).filter(db.PluginStatus.is_running == True).all()
	print("Running jobs:")
	for jitem in running:
		print("	", jitem.plugin_name, jitem.is_running, jitem.last_run, jitem.last_error, jitem.last_error_msg)
	if not running:
		print("	<None!>")

	print("Running threads:")
	for thread in threading.enumerate():
		print("	", thread.getName(), thread)
	db.delete_db_session()
def exposed_fetch(url, debug=True, rss_debug=False):
	'''
	Do a synchronous fetch of content from url `url`.
	'''

	# try:
	# 	WebMirror.SpecialCase.startAmqpFetcher()
	# except RuntimeError:  # Fetcher already started
	# 	pass

	if rss_debug:
		print("Debugging RSS")
		flags.RSS_DEBUG = True

	parsed = urllib.parse.urlparse(url)
	root = urllib.parse.urlunparse((parsed[0], parsed[1], "", "", "", ""))

	new = db.WebPages(
		url       = url,
		starturl  = root,
		netloc    = parsed.netloc,
		distance  = 50000,
		is_text   = True,
		priority  = 500000,
		type      = 'unknown',
		fetchtime = datetime.datetime.now(),
		)

	if debug:
		print(new)

	try:
		archiver = SiteArchiver(None, db.get_db_session(), None)
		job     = archiver.synchronousJobRequest(url, ignore_cache=True)
	except Exception as e:
		traceback.print_exc()
	finally:
		db.delete_db_session()
示例#17
0
	def __init__(self, job_name):

		if not job_name in CALLABLE_LUT:
			raise JobNameException("Callable '%s' is not in the class lookup table: '%s'!" % (job_name, CALLABLE_LUT))
		self.runModule = CALLABLE_LUT[job_name]
		self.job_name = job_name


		session = db.get_db_session()

		try:
			query = session.query(db.PluginStatus).filter(db.PluginStatus.plugin_name==job_name)
			have = query.scalar()
			if not have:
				new = db.PluginStatus(plugin_name=job_name)
				session.add(new)
				session.commit()
		except sqlalchemy.exc.OperationalError:
			session.rollback()
		except sqlalchemy.exc.InvalidRequestError:
			session.rollback()

		finally:
			db.delete_db_session()
示例#18
0
	def __init__(self, job_name):

		if not job_name in CALLABLE_LUT:
			raise JobNameException("Callable '%s' is not in the class lookup table: '%s'!" % (job_name, CALLABLE_LUT))
		self.runModule = CALLABLE_LUT[job_name]
		self.job_name = job_name


		session = db.get_db_session()

		try:
			query = session.query(db.PluginStatus).filter(db.PluginStatus.plugin_name==job_name)
			have = query.scalar()
			if not have:
				new = db.PluginStatus(plugin_name=job_name)
				session.add(new)
				session.commit()
		except sqlalchemy.exc.OperationalError:
			session.rollback()
		except sqlalchemy.exc.InvalidRequestError:
			session.rollback()

		finally:
			db.delete_db_session()
示例#19
0
	def __del__(self):
		db.delete_db_session()
示例#20
0
	def __del__(self):
		db.delete_db_session()