Пример #1
0
	def get_context_data(self, **kwargs):
		context = super(AdminConsoleView, self).get_context_data(**kwargs)

		#get profile
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles

		#get current user
		user = self.request.session['user']

		#populate context
		context['common'] = dict()
		context['common']['title'] = 'User management console'
		context['common']['current_menu_title'] = 4
		context['common']['user'] = user

		#get list of users
		profiles_cursor = db.users.find()

		#create id attribute since _id can't be used
		profiles = []
		for profile in profiles_cursor:
			profile['id'] = str(profile['_id'])
			profile['is_admin'] = profile['role'] == 'admin'
			profiles.append(profile)

		#raise Exception(profiles)
		context['profiles'] = profiles
		context['common']['version'] = CustomAppSettings.get_version()

		return context
Пример #2
0
	def get_context_data(self, **kwargs):
		context = super(IndexView, self).get_context_data(**kwargs)

		# Get current user
		u = self.get_user( CustomAppSettings.get_vissbl_user_id() )

		# Get stats	
		#stats = u.get_user_stats_object()
		#top_by_date = stats.get_top_results()

		# Get reference to the last rank
		#last_rank = top_by_date[-1] if len(top_by_date) > 0 else {'value':{'ranks':[]}}
		
		# Populate context
		#context['top_sites_json'] = json.dumps(top_by_date, default=json_util.default)
		#context['itms'] = last_rank['value']['ranks'][:30]

		context['top_count'] = 30

		context['common'] = dict()
		context['common']['title'] = 'Vissbl - real online ranking'
		context['common']['current_menu_item'] = 0
		context['common']['user'] = u
		context['common']['version'] = CustomAppSettings.get_version()
		return context
Пример #3
0
	def get_context_data(self, **kwargs):
		context = super(MonitorView, self).get_context_data(**kwargs)

		itms = self.object_list

		#get date from which search
		d = datetime.datetime.utcnow()
		d = d.replace(hour=0, minute=0, second=0, microsecond=0)

		#get current user
		user = self.request.session['user']

		#populate context
		context['common'] = dict()
		context['itms'] = itms

		context['from_date'] = d

		context['total_results'] = self.total_results
		context['n'] = self.n if self.total_results > self.n else self.total_results

		context['common']['title'] = 'Monitor scheduled tasks'
		context['common']['current_menu_item'] = 1
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()
		return context
Пример #4
0
	def get_context_data(self, **kwargs):
		context = super(ScheduledJobsView, self).get_context_data(**kwargs)

		max_allowed_jobs, active_jobs, inactive_jobs = self.object_list

		#get current user
		user = self.request.session['user']

		active_freq, freqs = Frequency().get_context_friendly_frequencies()

		#populate context
		context['itms'] = active_jobs + inactive_jobs
		context['max_allowed_jobs'] = max_allowed_jobs
		context['active_jobs_count'] = len(active_jobs)
		context['inactive_jobs_count'] = len(inactive_jobs)

		context['is_new'] = False
		context['selected_frequency'] = active_freq
		context['frequencies'] = freqs
		context['job_form_action'] = './'

		context['common'] = dict()
		context['common']['title'] = 'Scheduled jobs'
		context['common']['current_menu_item'] = 2
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()
		return context
Пример #5
0
	def get_queryset(self):
		cl = CustomAppSettings.get_mongo()

		query = self.create_query_doc()

		#get the user
		user = self.request.session['user']

		#query
		db = cl[user['db']]
		res = db.urls \
			.find(query, {'date_scraped': 1, 'estimated_res': 1, 'task_id': 1, 'url': 1, 'query': 1, 'domain': 1,
						  'scheduled': 1}) \
			.sort([('date_scraped', -1)])

		#find total
		self.total_results = res.count()

		#convert to array
		queryset = [el for el in res.limit(self.n)]

		res.close()
		cl.close()

		return queryset
Пример #6
0
	def clean(self):
		""" Make sure the secret key exists """
		
		#get cleaned data and validate form
		cleaned_data = super(LoginForm, self).clean()
		
		#if found errors - return
		if self._errors:
			return cleaned_data
		
		#encode secret_key
		secret_key = cleaned_data.get('secret_key')

		#get user id
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles
		profile = db.users.find_one({'secret_key': secret_key},{'_id':1})
		
		if profile:
			cleaned_data['profile_id'] = str(profile['_id'])
		else:
			msg = u"Invalid secret key"
			self._errors['secret_key'] = self.error_class([msg])
			del cleaned_data['secret_key']
		
		return cleaned_data
Пример #7
0
	def get_context_data(self, **kwargs):
		context = super(ExportPreviewView, self).get_context_data(**kwargs)

		itms = self.object_list

		#get current user
		user = self.request.session['user']

		#populate context
		context['common'] = dict()
		context['itms'] = itms

		context['from_date'] = self.d_from
		context['to_date'] = self.d_to
		context['kw'] = self.kw if len(self.kw) > 0 else '*'
		context['kw_placeholder'] = itms[0]['query']['q'] if len(itms) > 0 else ''
		context['domain'] = self.domain if len(self.domain) > 0 else '*'
		context['domain_placeholder'] = itms[0]['domain'] if len(itms) > 0 else ''
		context['scheduled'] = self.scheduled

		context['total_results'] = self.total_results
		context['n'] = self.n if self.total_results > self.n else self.total_results

		context['common']['title'] = 'Export data'
		context['common']['current_menu_item'] = 3
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()
		return context
Пример #8
0
	def generate_secret_key(self):
		secret_key = CustomAppSettings.get_new_secret_key()

		# Insert the key into the db
		db = self.get_user_profiles_db()
		res = db.users.update(spec={'_id':ObjectId(self.get_user_id())}, document={'$set':{'secret_key': secret_key}}, upsert=True)

		return secret_key, res
Пример #9
0
	def query(self):
		u = self.get_user( CustomAppSettings.get_vissbl_user_id() )

		# Get stats	
		stats = User(u).get_user_stats_object()
		data = stats.get_top_results()

		# Get reference to the last date of ranks
		last_date = data[-1] if len(data) > 0 else {'value':{'ranks':[]}}
		
		return {"data": data, "last_date_ranks": last_date['value']['ranks'][:30]}
	def _track_proxy_connection(self, ):
		cl = CustomAppSettings.get_mongo()
		
		db = self._db
		
		d = datetime.datetime.utcnow()
		
		#increase calls count
		db.calls.find_and_modify(
			query = {'date.y':d.year, 'date.m': d.month, 'date.d': d.day, 'date.h': d.hour, 'date.n': d.minute, 'date.s': d.second},
			update = {'$inc': {'count': 1}},
			upsert = True)
Пример #11
0
	def get_context_data(self, **kwargs):
		context = super(LoginView, self).get_context_data(**kwargs)

		#populate context
		context['common'] = dict()
		context['common']['title'] = 'Login'
		context['common']['current_menu_item'] = 3
		context['common']['version'] = CustomAppSettings.get_version()

		#ignore login
		context['ignore_login'] = True

		return context
Пример #12
0
	def get_redirect_url(self, *args, **kwargs):
		""" Delete indicated user if current user is Admin """

		#get current user
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles

		#delete requested user if current user is admin
		if self.request.session['user']['is_admin']:
			#prevent disactivating admins
			db.users.update({'_id': ObjectId(kwargs['profile_id']), 'role': {'$ne': 'admin'}},
							{'$inc': {'is_active': 1}, '$set': {'actdeact': datetime.datetime.utcnow()}})

		return reverse('webscraper:userconsole')
Пример #13
0
	def get_context_data(self, **kwargs):
		context = super(ExportPreviewTaskView, self).get_context_data(**kwargs)

		cl = CustomAppSettings.get_mongo()

		user = self.request.session['user']

		db = cl[user['db']]
		res = db.urls \
			.find_one({'task_id': self.taskid}, {'results': 1})

		if res:
			res = res['results']  #convert to array

		context['itms'] = res

		return context
Пример #14
0
	def do_edit(self, kwargs):
		#return false if user is not admin
		if not self.request.session['user']['is_admin']:
			return 'NOT ADMIN'

		#get new alias and id
		alias = self.request.POST.get('alias')
		id = kwargs['profile_id']

		#ignore if alias is empty
		if len(alias.strip()) == 0:
			return 'WRONG ALIAS'

		#do update
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles
		db.users.update({'_id': ObjectId(id)}, {'$set': {'alias': alias}})

		return 'OK'
	def maintain(self):
		from webscraper.entities import User
		
		users = User.get_active_users()
		
		#mongodb can have this limit on IN case
		MAX_IN_LIMIT = 4000000
		
		#count total deletes
		tot = 0
		
		#get celery_db connection
		cl = CustomAppSettings.get_mongo()
		celery_db = cl.celery_db
		
		# remove old search results from each db
		for user in users:
			#get old task ids
			old_task_ids = user.get_old_task_ids()[:MAX_IN_LIMIT]
			
			count = len(old_task_ids)
			tot += count
			
			#db
			db = user.get_user_db()
			
			#remove old userdb.task_meta
			db.task_meta.remove({'_id': {'$in': old_task_ids}})
			
			#remove old userdb.urls
			db.urls.remove({'task_id':{'$in': old_task_ids}})
			
			#remove old celerydb.task_meta
			celery_db.task_meta.remove({'_id':{'$in': old_task_ids}})
			
			#write user profile statistics on removed
			db = user.get_user_profiles_db()
			col = db.cleaning_stats
			d = datetime.datetime.utcnow()
			doc = {'user': user.get_user_id(), 'date_done': d, 'total_removed': count}
			col.insert(doc)
		
		return {'total': tot}
Пример #16
0
	def get_queryset_csv(self):
		"""gets data from db, dumps into excel stream and retuns the stream"""

		query = self.create_query_doc()

		#get data
		cl = CustomAppSettings.get_mongo()

		user = self.request.session['user']

		db = cl[user['db']]
		res = db.urls \
			.find(query) \
			.sort([('date_scraped', -1)])

		#create csv stream
		fname = 'export_{0}_{1}.csv'.format(self.d_from.strftime('%Y-%m-%d %H:%M:%S'),self.d_to.strftime('%Y-%m-%d %H:%M:%S'))
		csvstream = self.create_csv_stream(res)

		return csvstream, fname
Пример #17
0
	def get_queryset_excel(self):
		"""gets data from db, dumps into excel stream and retuns the stream"""

		query = self.create_query_doc()

		#get data
		cl = CustomAppSettings.get_mongo()

		user = self.request.session['user']

		db = cl[user['db']]
		res = db.urls \
			.find(query) \
			.sort([('date_scraped', -1)])
		queryset = [el for el in res]
		res.close()
		cl.close()

		#create excel stream
		excelstream = self.create_excel_stream(queryset)

		return excelstream
Пример #18
0
	def get_context_data(self, **kwargs):
		context = super(UserConsoleView, self).get_context_data(**kwargs)

		#get current user
		user = self.request.session['user']
		self.user = user

		#get user statistics
		userinfo = self._get_user_info()
		userinfo['info']['requests_per_day_json'] = json.dumps(userinfo['info']['requests_per_day'],
															   default=json_util.default)

		context['userinfo'] = userinfo

		#populate context
		context['common'] = dict()
		context['common']['title'] = 'User console'
		context['common']['current_menu_item'] = 4
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()

		return context
Пример #19
0
	def get_context_data(self, **kwargs):
		context = super(ScrapeView, self).get_context_data(**kwargs)

		#get current user
		user = self.request.session['user']

		u = self.get_user()
		active_freq, freqs = Frequency().get_context_friendly_frequencies()

		#populate context
		context['is_new'] = True
		context['can_schedule'] = u.can_schedule_urls()
		context['selected_frequency'] = active_freq
		context['frequencies'] = freqs
		context['job_form_action'] = './jobs/'
		context['has_points'] = u.has_points()
		context['is_active'] = True

		context['common'] = dict()
		context['common']['title'] = 'Webscraper'
		context['common']['current_menu_item'] = 0
		context['common']['user'] = user
		context['common']['version'] = CustomAppSettings.get_version()
		return context
	def _get_db_connection(self):
		return CustomAppSettings.get_mongo()
	def _db_write_res(self, **kwargs):
		"""write result into db;
		doc structure: {
			task_id: ''
			date_scraped: date,
			url: '',
			group_name: '',
			domain:'',
			query:{q:'', start:0, num:0},
			estimated_res: 0,
			scheduled: false,
			results: [{
				type: '',
				title: '',
				title_url: '',
				tld:'',
				content: '',
				content_time: '',
				related_links: ['','',''...]
				}, {...}],
			typed_results:{
				url: [res_id, res_id, res_id...],
				...}
			}"""
		
		#instantiate mongodb client
		client = CustomAppSettings.get_mongo()
		
		#connect to db
		db = client[self.user['db']]
		
		#get collection
		urls = db.urls
	
		#create query object
		db_query = {"q":'', 'num':0, 'start':0}
		url_query = kwargs['query']
		for k in url_query.keys():
			if db_query.has_key(k):
				db_query[k] = url_query[k]
	
		#create doc
		d = datetime.datetime.utcnow()
		doc = {
			"task_id": kwargs['task_id'],
			"date_scraped": datetime.datetime.utcnow(),
			"url": kwargs['url'],
			"group_name": kwargs['group_name'],
			"domain": kwargs['domain'],
			"query": db_query,
			"estimated_res": kwargs['tot_res'],
			"results": kwargs['results'],
			"typed_results": {},
			"scheduled": self.is_scheduled
			}
		
		#insert typed_results into doc
		typed = doc['typed_results']
		for ind, el in enumerate(kwargs['results']):
			el['#'] = ind
			t = el['type']
			if not typed.has_key(t):
				typed[t] = []
			typed[t].append(ind)
		
		#insert doc
		doc_id = urls.insert(doc)
		return doc_id
	def _give_points_back(self, reduction):
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles
		
		return db.users.update(spec={'_id': ObjectId(self.user['_id'])}, document={'$inc': {'points': reduction}}, new = True)
	def _reduce_points(self, reduction):
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles

		return db.users.find_and_modify(query={'_id': ObjectId(self.user['_id'])},update={'$inc':{'points': -reduction}}, new = True)
	def scrape(self, url, return_data):
		current_task = self.current_task
		logger = self.logger

		url = url.encode('utf-8')
		
		#change status
		current_task.update_state(state=u'STARTED', meta={'url': url, 'group': self.group_name})
		
		logger.info('TASK EXECUTING: %r, args: %r kwargs: %r' % ( self.current_task.request.id,
			self.current_task.request.args, self.current_task.request.kwargs))
		
		#avoid doing anything if url is empty
		if len(url) == 0:
			self._wrong_param_exception(url)

		#parse url
		u = urlparse.urlparse(url)
		
		#add scheme if missing
		if u.scheme == '':
			url = 'http://' + url
		u = urlparse.urlparse(url)
		
		#get netloc
		netloc = u.netloc
		
		#get parsed query
		qs = urlparse.parse_qs(u.query)
		start = (qs['start'] if 'start' in qs else ['0'])[0]
		start = int(start)

		#convert qs elements from array to normal strings
		for k in qs.keys():
			el = qs[k]
			el = ' '.join(el)
			
			#try to convert number strings into numbers
			new_k = k.lower()
			if new_k == 'num' or new_k == 'start':
				el = el.replace(' ', '')
				try:
					el = int(el)
				except Exception:
					pass
			
			qs[k] = el
		
		#add default values for num and start if there are none
		if not 'num' in qs:
			qs['num'] = 10
		if not 'start' in qs:
			qs['start'] = 0
		
		#get domain name of the query
		psl = PublicSuffixList()
		query_domain = psl.get_public_suffix(netloc)
		
		#check if it is google
		parts = query_domain.split(u'.')

		scraped_docs = ''
		if len(parts) > 0 and parts[0].upper() == u'GOOGLE':
			current_task.update_state(state=u'CONNECTING', meta={'url': url, 'group': self.group_name})
			
			#create request
			req = {
				'url': urllib.quote_plus(url, "%/:=&?~#+!$,;'@()*[]"),
				'referer': u'http://google.com',
				'useragent': u'Webscraper/' + CustomAppSettings.get_version() + ' (+http://www.yriver.it/webscraper/)',#'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.47 Safari/537.36',
				'region': u'gb',
				'priority': u'1'
			}
			
			#define which scrape key to take
			scrape_key = get_scrape_key_special if self.plan['is_special'] else get_scrape_key
			
			#make query
			query = {'key': scrape_key(), 'request': req}
			
			p = ProxyConnection(self.user, self.plan)
			html = p.send_request(query)
			
			#parse html
			scraped_docs = ''

			if len(html) > 0 and html != '0':
				scraped_docs, tot_res_est = self._parse_html(html)
				#write into db
				self._db_write_res(task_id=current_task.request.id, url=url, group_name=self.group_name, results=scraped_docs, tot_res=tot_res_est, start=start,
						query=qs, domain=query_domain)
		
		# Convert to Base64 if return_data = True
		if return_data:
			encoded_result = base64.standard_b64encode(json.dumps(scraped_docs))
			return {'url': url, 'group_name': self.group_name, 'domain': query_domain, 'b64_json': encoded_result}
		else:
			return {'url': url, 'group_name': self.group_name}
	def __init__(self, user, plan):
		socket.setdefaulttimeout(30)
		self._db = CustomAppSettings.get_mongo()[user['db']]
		self.plan = plan
		self.user = user
Пример #26
0
	def query(self, tld):
		u = self.get_user( CustomAppSettings.get_vissbl_user_id() )
		stats = User(u).get_user_stats_object()
		res = stats.get_ranks_for_tld(tld)
		return res
Пример #27
0
	def get_mongo(self):
		if not self.cl:
			self.cl = CustomAppSettings.get_mongo()
		
		return self.cl
Пример #28
0
	def create_user(alias='', plan_name='basic', email='', facebook_user={}, *args, **kwargs):
		db = User({}).get_user_profiles_db()
		u = None

		fb_id = ''

		# If Facebook user is supplied, use it as the highest priority
		if facebook_user:
			# Create a brand new user
			alias = facebook_user.get('name', '').strip()
			email = facebook_user.get('email', '').strip()
			fb_id = facebook_user.get('id', '').strip()

		elif email:
			email = email.strip()
			if User({})._validate_email(email):
				# Make sure email does not exist
				u = db.users.find_one({'contacts.email': email.lower(), 'role': 'user'})

				if u:
					return {'error': 'Email already occupied'}
			else:
				return {'error': 'Invalid email'}

		# ---- Green light for creating new user

		#get plan
		plan = db.plans.find_one({'name': plan_name})

		# Get db index
		db_ind = db.counters.find_and_modify(
			query={'_id': 'dbindex'},
			update={'$inc': {'seq': 1}},
			upsert=True,
			new=True
		)['seq']
		db_ind = int(db_ind)

		# Get new secret key
		secret_key = CustomAppSettings.get_new_secret_key()

		# Get time
		d = datetime.datetime.utcnow()

		# Create doc
		doc = {
			'alias': alias.strip(),
			'secret_key': secret_key.lower(),
			'db_ind': db_ind,
			'role': 'user',
			'is_active': 0,
			'created': d,
			'actdeact': d,
			'last_access': None, 
			'plan': {'id': plan['_id'], 'date': d},
			'plans_history': [],
			'contacts': {'email': email.lower(), 'cell': ''},
			'points': 100,
			'facebook_id': fb_id
		}

		# # Add additional fields
		# for k, v in kwargs.iteritems():
		# 	if not k in doc:
		# 		doc[k]  = v

		# Try to write
		userid = db.users.insert(doc)

		# Create indexes on user db
		cl = User({}).get_mongo()
		db = cl['webscr_' + str(db_ind)]
		db.urls.ensure_index('date_scraped', 1)
		db.task_meta.ensure_index('date_done', 1)

		return {'profile_id': userid}