예제 #1
0
	def get_queryset(self):
		cl = CustomAppSettings.get_mongo()

		query = self.create_query_doc()

		#get the user
		user = self.request.session['user']

		#query
		db = cl[user['db']]
		res = db.urls \
			.find(query, {'date_scraped': 1, 'estimated_res': 1, 'task_id': 1, 'url': 1, 'query': 1, 'domain': 1,
						  'scheduled': 1}) \
			.sort([('date_scraped', -1)])

		#find total
		self.total_results = res.count()

		#convert to array
		queryset = [el for el in res.limit(self.n)]

		res.close()
		cl.close()

		return queryset
예제 #2
0
	def get_context_data(self, **kwargs):
		context = super(AdminConsoleView, self).get_context_data(**kwargs)

		#get profile
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles

		#get current user
		user = self.request.session['user']

		#populate context
		context['common'] = dict()
		context['common']['title'] = 'User management console'
		context['common']['current_menu_title'] = 4
		context['common']['user'] = user

		#get list of users
		profiles_cursor = db.users.find()

		#create id attribute since _id can't be used
		profiles = []
		for profile in profiles_cursor:
			profile['id'] = str(profile['_id'])
			profile['is_admin'] = profile['role'] == 'admin'
			profiles.append(profile)

		#raise Exception(profiles)
		context['profiles'] = profiles
		context['common']['version'] = CustomAppSettings.get_version()

		return context
예제 #3
0
	def clean(self):
		""" Make sure the secret key exists """
		
		#get cleaned data and validate form
		cleaned_data = super(LoginForm, self).clean()
		
		#if found errors - return
		if self._errors:
			return cleaned_data
		
		#encode secret_key
		secret_key = cleaned_data.get('secret_key')

		#get user id
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles
		profile = db.users.find_one({'secret_key': secret_key},{'_id':1})
		
		if profile:
			cleaned_data['profile_id'] = str(profile['_id'])
		else:
			msg = u"Invalid secret key"
			self._errors['secret_key'] = self.error_class([msg])
			del cleaned_data['secret_key']
		
		return cleaned_data
	def _track_proxy_connection(self, ):
		cl = CustomAppSettings.get_mongo()
		
		db = self._db
		
		d = datetime.datetime.utcnow()
		
		#increase calls count
		db.calls.find_and_modify(
			query = {'date.y':d.year, 'date.m': d.month, 'date.d': d.day, 'date.h': d.hour, 'date.n': d.minute, 'date.s': d.second},
			update = {'$inc': {'count': 1}},
			upsert = True)
예제 #5
0
	def get_redirect_url(self, *args, **kwargs):
		""" Delete indicated user if current user is Admin """

		#get current user
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles

		#delete requested user if current user is admin
		if self.request.session['user']['is_admin']:
			#prevent disactivating admins
			db.users.update({'_id': ObjectId(kwargs['profile_id']), 'role': {'$ne': 'admin'}},
							{'$inc': {'is_active': 1}, '$set': {'actdeact': datetime.datetime.utcnow()}})

		return reverse('webscraper:userconsole')
예제 #6
0
	def get_context_data(self, **kwargs):
		context = super(ExportPreviewTaskView, self).get_context_data(**kwargs)

		cl = CustomAppSettings.get_mongo()

		user = self.request.session['user']

		db = cl[user['db']]
		res = db.urls \
			.find_one({'task_id': self.taskid}, {'results': 1})

		if res:
			res = res['results']  #convert to array

		context['itms'] = res

		return context
	def maintain(self):
		from webscraper.entities import User
		
		users = User.get_active_users()
		
		#mongodb can have this limit on IN case
		MAX_IN_LIMIT = 4000000
		
		#count total deletes
		tot = 0
		
		#get celery_db connection
		cl = CustomAppSettings.get_mongo()
		celery_db = cl.celery_db
		
		# remove old search results from each db
		for user in users:
			#get old task ids
			old_task_ids = user.get_old_task_ids()[:MAX_IN_LIMIT]
			
			count = len(old_task_ids)
			tot += count
			
			#db
			db = user.get_user_db()
			
			#remove old userdb.task_meta
			db.task_meta.remove({'_id': {'$in': old_task_ids}})
			
			#remove old userdb.urls
			db.urls.remove({'task_id':{'$in': old_task_ids}})
			
			#remove old celerydb.task_meta
			celery_db.task_meta.remove({'_id':{'$in': old_task_ids}})
			
			#write user profile statistics on removed
			db = user.get_user_profiles_db()
			col = db.cleaning_stats
			d = datetime.datetime.utcnow()
			doc = {'user': user.get_user_id(), 'date_done': d, 'total_removed': count}
			col.insert(doc)
		
		return {'total': tot}
예제 #8
0
	def do_edit(self, kwargs):
		#return false if user is not admin
		if not self.request.session['user']['is_admin']:
			return 'NOT ADMIN'

		#get new alias and id
		alias = self.request.POST.get('alias')
		id = kwargs['profile_id']

		#ignore if alias is empty
		if len(alias.strip()) == 0:
			return 'WRONG ALIAS'

		#do update
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles
		db.users.update({'_id': ObjectId(id)}, {'$set': {'alias': alias}})

		return 'OK'
예제 #9
0
	def get_queryset_csv(self):
		"""gets data from db, dumps into excel stream and retuns the stream"""

		query = self.create_query_doc()

		#get data
		cl = CustomAppSettings.get_mongo()

		user = self.request.session['user']

		db = cl[user['db']]
		res = db.urls \
			.find(query) \
			.sort([('date_scraped', -1)])

		#create csv stream
		fname = 'export_{0}_{1}.csv'.format(self.d_from.strftime('%Y-%m-%d %H:%M:%S'),self.d_to.strftime('%Y-%m-%d %H:%M:%S'))
		csvstream = self.create_csv_stream(res)

		return csvstream, fname
예제 #10
0
	def get_queryset_excel(self):
		"""gets data from db, dumps into excel stream and retuns the stream"""

		query = self.create_query_doc()

		#get data
		cl = CustomAppSettings.get_mongo()

		user = self.request.session['user']

		db = cl[user['db']]
		res = db.urls \
			.find(query) \
			.sort([('date_scraped', -1)])
		queryset = [el for el in res]
		res.close()
		cl.close()

		#create excel stream
		excelstream = self.create_excel_stream(queryset)

		return excelstream
	def _get_db_connection(self):
		return CustomAppSettings.get_mongo()
	def _db_write_res(self, **kwargs):
		"""write result into db;
		doc structure: {
			task_id: ''
			date_scraped: date,
			url: '',
			group_name: '',
			domain:'',
			query:{q:'', start:0, num:0},
			estimated_res: 0,
			scheduled: false,
			results: [{
				type: '',
				title: '',
				title_url: '',
				tld:'',
				content: '',
				content_time: '',
				related_links: ['','',''...]
				}, {...}],
			typed_results:{
				url: [res_id, res_id, res_id...],
				...}
			}"""
		
		#instantiate mongodb client
		client = CustomAppSettings.get_mongo()
		
		#connect to db
		db = client[self.user['db']]
		
		#get collection
		urls = db.urls
	
		#create query object
		db_query = {"q":'', 'num':0, 'start':0}
		url_query = kwargs['query']
		for k in url_query.keys():
			if db_query.has_key(k):
				db_query[k] = url_query[k]
	
		#create doc
		d = datetime.datetime.utcnow()
		doc = {
			"task_id": kwargs['task_id'],
			"date_scraped": datetime.datetime.utcnow(),
			"url": kwargs['url'],
			"group_name": kwargs['group_name'],
			"domain": kwargs['domain'],
			"query": db_query,
			"estimated_res": kwargs['tot_res'],
			"results": kwargs['results'],
			"typed_results": {},
			"scheduled": self.is_scheduled
			}
		
		#insert typed_results into doc
		typed = doc['typed_results']
		for ind, el in enumerate(kwargs['results']):
			el['#'] = ind
			t = el['type']
			if not typed.has_key(t):
				typed[t] = []
			typed[t].append(ind)
		
		#insert doc
		doc_id = urls.insert(doc)
		return doc_id
	def _give_points_back(self, reduction):
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles
		
		return db.users.update(spec={'_id': ObjectId(self.user['_id'])}, document={'$inc': {'points': reduction}}, new = True)
	def _reduce_points(self, reduction):
		cl = CustomAppSettings.get_mongo()
		db = cl.webscr_profiles

		return db.users.find_and_modify(query={'_id': ObjectId(self.user['_id'])},update={'$inc':{'points': -reduction}}, new = True)
	def __init__(self, user, plan):
		socket.setdefaulttimeout(30)
		self._db = CustomAppSettings.get_mongo()[user['db']]
		self.plan = plan
		self.user = user
예제 #16
0
	def get_mongo(self):
		if not self.cl:
			self.cl = CustomAppSettings.get_mongo()
		
		return self.cl