示例#1
0
	def add_url(self, request):

		''' Add a URL to the crawl routine '''

		logging.info('========== ADD_URL ==========')

		if request.parent is not None:
			logging.info('URL has parent.')
			p = ndb.Key(urlsafe=request.parent)
			q = QueuedURL(key=ndb.Key(k, QueuedURL, request.url), url=request.url)
			k = q.put()
		else:
			logging.info('URL has NO parent.')
			q = QueuedURL(key=ndb.Key(QueuedURL, request.url), url=request.url)
			k = q.put()

		logging.info('QueuedURL key: '+k.urlsafe())

		c = QueuedURL.query().count()

		logging.info('New URL count: '+str(c))

		## Generate response
		qu = QueuedURLMessage(key=k.urlsafe())
		qu.url = request.url
		qu.started = q.started
		if q.key.parent() is not None:
			qu.parent = q.key.parent().urlsafe()
			qu.parent_url=q.key.parent().id_or_name()
		qu.modified = str(q.modified)
		qu.created = str(q.created)
		if q.status_code:
			qu.status_code = q.status_code
		if q.task_id:
			qu.task_id = q.task_id

		return AddURLResponse(added=True, newcount=c, queued_url=qu)
示例#2
0
def add_url(key_or_url, urlparent=None, start=False):

	''' Creates a QueuedURL entity and optionally enqueues a task to scrape. '''

	global get_task

	if isinstance(key_or_url, basestring):
		url = key_or_url
		if urlparent is not None:
			if not isinstance(urlparent, (ndb.Model,ndb.Key)):
				urlparent = ndb.Key(urlparent)
			q = QueuedURL(key=ndb.Key(urlparent, 'QueuedURL', url), url=url, started=False, parent=urlparent)
		else:
			q = QueuedURL(key=ndb.Key('QueuedURL', url), url=url, started=False, parent=None)
		qk = q.put()
		t = get_task(qk, start)
		if start:
			qk.task_id = t.name

		return t, q

	elif isinstance(key_or_url, (ndb.Key, ndb.Model)):

		if isinstance(key_or_url, ndb.Key):
			key = key_or_url
			qu = ndb.Key(urlsafe=key).get()
		else:
			key = key_or_url.key
			qu = key_or_url

		if not qu.started:
			t = get_task(qu.key, start)
			qu.task_id = t.name
			return t, qu
		else:
			return qu.task_id, qu