Пример #1
0
	def get_context_data(self, **kwargs):
		context = super(ExportPreviewView, self).get_context_data(**kwargs)

		itms = self.object_list

		#get current user
		user = self.request.session['user']

		#populate context
		context['common'] = dict()
		context['itms'] = itms

		context['from_date'] = self.d_from
		context['to_date'] = self.d_to
		context['kw'] = self.kw if len(self.kw) > 0 else '*'
		context['kw_placeholder'] = itms[0]['query']['q'] if len(itms) > 0 else ''
		context['domain'] = self.domain if len(self.domain) > 0 else '*'
		context['domain_placeholder'] = itms[0]['domain'] if len(itms) > 0 else ''
		context['scheduled'] = self.scheduled

		context['total_results'] = self.total_results
		context['n'] = self.n if self.total_results > self.n else self.total_results

		context['common']['title'] = 'Export data'
		context['common']['current_menu_item'] = 3
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()
		return context
Пример #2
0
	def get_context_data(self, **kwargs):
		context = super(ScheduledJobsView, self).get_context_data(**kwargs)

		max_allowed_jobs, active_jobs, inactive_jobs = self.object_list

		#get current user
		user = self.request.session['user']

		active_freq, freqs = Frequency().get_context_friendly_frequencies()

		#populate context
		context['itms'] = active_jobs + inactive_jobs
		context['max_allowed_jobs'] = max_allowed_jobs
		context['active_jobs_count'] = len(active_jobs)
		context['inactive_jobs_count'] = len(inactive_jobs)

		context['is_new'] = False
		context['selected_frequency'] = active_freq
		context['frequencies'] = freqs
		context['job_form_action'] = './'

		context['common'] = dict()
		context['common']['title'] = 'Scheduled jobs'
		context['common']['current_menu_item'] = 2
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()
		return context
Пример #3
0
	def get_context_data(self, **kwargs):
		context = super(IndexView, self).get_context_data(**kwargs)

		# Get current user
		u = self.get_user( CustomAppSettings.get_vissbl_user_id() )

		# Get stats	
		#stats = u.get_user_stats_object()
		#top_by_date = stats.get_top_results()

		# Get reference to the last rank
		#last_rank = top_by_date[-1] if len(top_by_date) > 0 else {'value':{'ranks':[]}}
		
		# Populate context
		#context['top_sites_json'] = json.dumps(top_by_date, default=json_util.default)
		#context['itms'] = last_rank['value']['ranks'][:30]

		context['top_count'] = 30

		context['common'] = dict()
		context['common']['title'] = 'Vissbl - real online ranking'
		context['common']['current_menu_item'] = 0
		context['common']['user'] = u
		context['common']['version'] = CustomAppSettings.get_version()
		return context
Пример #4
0
	def get_context_data(self, **kwargs):
		context = super(MonitorView, self).get_context_data(**kwargs)

		itms = self.object_list

		#get date from which search
		d = datetime.datetime.utcnow()
		d = d.replace(hour=0, minute=0, second=0, microsecond=0)

		#get current user
		user = self.request.session['user']

		#populate context
		context['common'] = dict()
		context['itms'] = itms

		context['from_date'] = d

		context['total_results'] = self.total_results
		context['n'] = self.n if self.total_results > self.n else self.total_results

		context['common']['title'] = 'Monitor scheduled tasks'
		context['common']['current_menu_item'] = 1
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()
		return context
Пример #5
0
	def get_context_data(self, **kwargs):
		context = super(AdminConsoleView, self).get_context_data(**kwargs)

		#get profile
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles

		#get current user
		user = self.request.session['user']

		#populate context
		context['common'] = dict()
		context['common']['title'] = 'User management console'
		context['common']['current_menu_title'] = 4
		context['common']['user'] = user

		#get list of users
		profiles_cursor = db.users.find()

		#create id attribute since _id can't be used
		profiles = []
		for profile in profiles_cursor:
			profile['id'] = str(profile['_id'])
			profile['is_admin'] = profile['role'] == 'admin'
			profiles.append(profile)

		#raise Exception(profiles)
		context['profiles'] = profiles
		context['common']['version'] = CustomAppSettings.get_version()

		return context
Пример #6
0
	def get_context_data(self, **kwargs):
		context = super(LoginView, self).get_context_data(**kwargs)

		#populate context
		context['common'] = dict()
		context['common']['title'] = 'Login'
		context['common']['current_menu_item'] = 3
		context['common']['version'] = CustomAppSettings.get_version()

		#ignore login
		context['ignore_login'] = True

		return context
Пример #7
0
	def get_context_data(self, **kwargs):
		context = super(UserConsoleView, self).get_context_data(**kwargs)

		#get current user
		user = self.request.session['user']
		self.user = user

		#get user statistics
		userinfo = self._get_user_info()
		userinfo['info']['requests_per_day_json'] = json.dumps(userinfo['info']['requests_per_day'],
															   default=json_util.default)

		context['userinfo'] = userinfo

		#populate context
		context['common'] = dict()
		context['common']['title'] = 'User console'
		context['common']['current_menu_item'] = 4
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()

		return context
Пример #8
0
	def get_context_data(self, **kwargs):
		context = super(ScrapeView, self).get_context_data(**kwargs)

		#get current user
		user = self.request.session['user']

		u = self.get_user()
		active_freq, freqs = Frequency().get_context_friendly_frequencies()

		#populate context
		context['is_new'] = True
		context['can_schedule'] = u.can_schedule_urls()
		context['selected_frequency'] = active_freq
		context['frequencies'] = freqs
		context['job_form_action'] = './jobs/'
		context['has_points'] = u.has_points()
		context['is_active'] = True

		context['common'] = dict()
		context['common']['title'] = 'Webscraper'
		context['common']['current_menu_item'] = 0
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()
		return context
	def scrape(self, url, return_data):
		current_task = self.current_task
		logger = self.logger

		url = url.encode('utf-8')
		
		#change status
		current_task.update_state(state=u'STARTED', meta={'url': url, 'group': self.group_name})
		
		logger.info('TASK EXECUTING: %r, args: %r kwargs: %r' % ( self.current_task.request.id,
			self.current_task.request.args, self.current_task.request.kwargs))
		
		#avoid doing anything if url is empty
		if len(url) == 0:
			self._wrong_param_exception(url)

		#parse url
		u = urlparse.urlparse(url)
		
		#add scheme if missing
		if u.scheme == '':
			url = 'http://' + url
		u = urlparse.urlparse(url)
		
		#get netloc
		netloc = u.netloc
		
		#get parsed query
		qs = urlparse.parse_qs(u.query)
		start = (qs['start'] if 'start' in qs else ['0'])[0]
		start = int(start)

		#convert qs elements from array to normal strings
		for k in qs.keys():
			el = qs[k]
			el = ' '.join(el)
			
			#try to convert number strings into numbers
			new_k = k.lower()
			if new_k == 'num' or new_k == 'start':
				el = el.replace(' ', '')
				try:
					el = int(el)
				except Exception:
					pass
			
			qs[k] = el
		
		#add default values for num and start if there are none
		if not 'num' in qs:
			qs['num'] = 10
		if not 'start' in qs:
			qs['start'] = 0
		
		#get domain name of the query
		psl = PublicSuffixList()
		query_domain = psl.get_public_suffix(netloc)
		
		#check if it is google
		parts = query_domain.split(u'.')

		scraped_docs = ''
		if len(parts) > 0 and parts[0].upper() == u'GOOGLE':
			current_task.update_state(state=u'CONNECTING', meta={'url': url, 'group': self.group_name})
			
			#create request
			req = {
				'url': urllib.quote_plus(url, "%/:=&?~#+!$,;'@()*[]"),
				'referer': u'http://google.com',
				'useragent': u'Webscraper/' + CustomAppSettings.get_version() + ' (+http://www.yriver.it/webscraper/)',#'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.47 Safari/537.36',
				'region': u'gb',
				'priority': u'1'
			}
			
			#define which scrape key to take
			scrape_key = get_scrape_key_special if self.plan['is_special'] else get_scrape_key
			
			#make query
			query = {'key': scrape_key(), 'request': req}
			
			p = ProxyConnection(self.user, self.plan)
			html = p.send_request(query)
			
			#parse html
			scraped_docs = ''

			if len(html) > 0 and html != '0':
				scraped_docs, tot_res_est = self._parse_html(html)
				#write into db
				self._db_write_res(task_id=current_task.request.id, url=url, group_name=self.group_name, results=scraped_docs, tot_res=tot_res_est, start=start,
						query=qs, domain=query_domain)
		
		# Convert to Base64 if return_data = True
		if return_data:
			encoded_result = base64.standard_b64encode(json.dumps(scraped_docs))
			return {'url': url, 'group_name': self.group_name, 'domain': query_domain, 'b64_json': encoded_result}
		else:
			return {'url': url, 'group_name': self.group_name}