def get_context_data(self, **kwargs): context = super(ExportPreviewView, self).get_context_data(**kwargs) itms = self.object_list #get current user user = self.request.session['user'] #populate context context['common'] = dict() context['itms'] = itms context['from_date'] = self.d_from context['to_date'] = self.d_to context['kw'] = self.kw if len(self.kw) > 0 else '*' context['kw_placeholder'] = itms[0]['query']['q'] if len(itms) > 0 else '' context['domain'] = self.domain if len(self.domain) > 0 else '*' context['domain_placeholder'] = itms[0]['domain'] if len(itms) > 0 else '' context['scheduled'] = self.scheduled context['total_results'] = self.total_results context['n'] = self.n if self.total_results > self.n else self.total_results context['common']['title'] = 'Export data' context['common']['current_menu_item'] = 3 context['common']['user'] = user context['common']['version'] = CustomAppSettings.get_version() return context
def get_context_data(self, **kwargs): context = super(ScheduledJobsView, self).get_context_data(**kwargs) max_allowed_jobs, active_jobs, inactive_jobs = self.object_list #get current user user = self.request.session['user'] active_freq, freqs = Frequency().get_context_friendly_frequencies() #populate context context['itms'] = active_jobs + inactive_jobs context['max_allowed_jobs'] = max_allowed_jobs context['active_jobs_count'] = len(active_jobs) context['inactive_jobs_count'] = len(inactive_jobs) context['is_new'] = False context['selected_frequency'] = active_freq context['frequencies'] = freqs context['job_form_action'] = './' context['common'] = dict() context['common']['title'] = 'Scheduled jobs' context['common']['current_menu_item'] = 2 context['common']['user'] = user context['common']['version'] = CustomAppSettings.get_version() return context
def get_context_data(self, **kwargs): context = super(IndexView, self).get_context_data(**kwargs) # Get current user u = self.get_user( CustomAppSettings.get_vissbl_user_id() ) # Get stats #stats = u.get_user_stats_object() #top_by_date = stats.get_top_results() # Get reference to the last rank #last_rank = top_by_date[-1] if len(top_by_date) > 0 else {'value':{'ranks':[]}} # Populate context #context['top_sites_json'] = json.dumps(top_by_date, default=json_util.default) #context['itms'] = last_rank['value']['ranks'][:30] context['top_count'] = 30 context['common'] = dict() context['common']['title'] = 'Vissbl - real online ranking' context['common']['current_menu_item'] = 0 context['common']['user'] = u context['common']['version'] = CustomAppSettings.get_version() return context
def get_context_data(self, **kwargs): context = super(MonitorView, self).get_context_data(**kwargs) itms = self.object_list #get date from which search d = datetime.datetime.utcnow() d = d.replace(hour=0, minute=0, second=0, microsecond=0) #get current user user = self.request.session['user'] #populate context context['common'] = dict() context['itms'] = itms context['from_date'] = d context['total_results'] = self.total_results context['n'] = self.n if self.total_results > self.n else self.total_results context['common']['title'] = 'Monitor scheduled tasks' context['common']['current_menu_item'] = 1 context['common']['user'] = user context['common']['version'] = CustomAppSettings.get_version() return context
def get_context_data(self, **kwargs): context = super(AdminConsoleView, self).get_context_data(**kwargs) #get profile cl = CustomAppSettings.get_mongo() db = cl.webscr_profiles #get current user user = self.request.session['user'] #populate context context['common'] = dict() context['common']['title'] = 'User management console' context['common']['current_menu_title'] = 4 context['common']['user'] = user #get list of users profiles_cursor = db.users.find() #create id attribute since _id can't be used profiles = [] for profile in profiles_cursor: profile['id'] = str(profile['_id']) profile['is_admin'] = profile['role'] == 'admin' profiles.append(profile) #raise Exception(profiles) context['profiles'] = profiles context['common']['version'] = CustomAppSettings.get_version() return context
def get_context_data(self, **kwargs): context = super(LoginView, self).get_context_data(**kwargs) #populate context context['common'] = dict() context['common']['title'] = 'Login' context['common']['current_menu_item'] = 3 context['common']['version'] = CustomAppSettings.get_version() #ignore login context['ignore_login'] = True return context
def get_context_data(self, **kwargs): context = super(UserConsoleView, self).get_context_data(**kwargs) #get current user user = self.request.session['user'] self.user = user #get user statistics userinfo = self._get_user_info() userinfo['info']['requests_per_day_json'] = json.dumps(userinfo['info']['requests_per_day'], default=json_util.default) context['userinfo'] = userinfo #populate context context['common'] = dict() context['common']['title'] = 'User console' context['common']['current_menu_item'] = 4 context['common']['user'] = user context['common']['version'] = CustomAppSettings.get_version() return context
def get_context_data(self, **kwargs): context = super(ScrapeView, self).get_context_data(**kwargs) #get current user user = self.request.session['user'] u = self.get_user() active_freq, freqs = Frequency().get_context_friendly_frequencies() #populate context context['is_new'] = True context['can_schedule'] = u.can_schedule_urls() context['selected_frequency'] = active_freq context['frequencies'] = freqs context['job_form_action'] = './jobs/' context['has_points'] = u.has_points() context['is_active'] = True context['common'] = dict() context['common']['title'] = 'Webscraper' context['common']['current_menu_item'] = 0 context['common']['user'] = user context['common']['version'] = CustomAppSettings.get_version() return context
def scrape(self, url, return_data): current_task = self.current_task logger = self.logger url = url.encode('utf-8') #change status current_task.update_state(state=u'STARTED', meta={'url': url, 'group': self.group_name}) logger.info('TASK EXECUTING: %r, args: %r kwargs: %r' % ( self.current_task.request.id, self.current_task.request.args, self.current_task.request.kwargs)) #avoid doing anything if url is empty if len(url) == 0: self._wrong_param_exception(url) #parse url u = urlparse.urlparse(url) #add scheme if missing if u.scheme == '': url = 'http://' + url u = urlparse.urlparse(url) #get netloc netloc = u.netloc #get parsed query qs = urlparse.parse_qs(u.query) start = (qs['start'] if 'start' in qs else ['0'])[0] start = int(start) #convert qs elements from array to normal strings for k in qs.keys(): el = qs[k] el = ' '.join(el) #try to convert number strings into numbers new_k = k.lower() if new_k == 'num' or new_k == 'start': el = el.replace(' ', '') try: el = int(el) except Exception: pass qs[k] = el #add default values for num and start if there are none if not 'num' in qs: qs['num'] = 10 if not 'start' in qs: qs['start'] = 0 #get domain name of the query psl = PublicSuffixList() query_domain = psl.get_public_suffix(netloc) #check if it is google parts = query_domain.split(u'.') scraped_docs = '' if len(parts) > 0 and parts[0].upper() == u'GOOGLE': current_task.update_state(state=u'CONNECTING', meta={'url': url, 'group': self.group_name}) #create request req = { 'url': urllib.quote_plus(url, "%/:=&?~#+!$,;'@()*[]"), 'referer': u'http://google.com', 'useragent': u'Webscraper/' + CustomAppSettings.get_version() + ' (+http://www.yriver.it/webscraper/)',#'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.47 Safari/537.36', 'region': u'gb', 'priority': u'1' } #define which scrape key to take scrape_key = get_scrape_key_special if self.plan['is_special'] else get_scrape_key #make query query = {'key': scrape_key(), 'request': req} p = ProxyConnection(self.user, self.plan) html = p.send_request(query) #parse html scraped_docs = '' if len(html) > 0 and html != '0': scraped_docs, tot_res_est = self._parse_html(html) #write into db self._db_write_res(task_id=current_task.request.id, url=url, group_name=self.group_name, results=scraped_docs, tot_res=tot_res_est, start=start, query=qs, domain=query_domain) # Convert to Base64 if return_data = True if return_data: encoded_result = base64.standard_b64encode(json.dumps(scraped_docs)) return {'url': url, 'group_name': self.group_name, 'domain': query_domain, 'b64_json': encoded_result} else: return {'url': url, 'group_name': self.group_name}