示例#1
0
def find_user_by_id(user_id):
    '''
	(str) --> dict
	Returns a user's info given their id
	'''
    try:
        db = database()
        if type(user_id) != str:
            raise TypeError

        data = db.users.find_one({"_id": user_id})
        if data:
            return data
        else:
            raise ValueError

    except TypeError:
        raise TypeError(
            bcolors.FAIL +
            "Invalid Input. Enter a valid user id as a string to use this function"
            + bcolors.ENDC)
        return False
    except ValueError:
        raise ValueError(bcolors.FAIL + "Invalid User ID" + bcolors.ENDC)
        return False
示例#2
0
def find_user_by_email(email):
    '''
	(str) --> dict
	Returns a user's info given their email
	'''
    try:
        db = database()
        data = []

        if type(email) != str:
            raise TypeError

        data = db.users.find_one({"emails.address": email})
        if data:
            return data
        else:
            raise ValueError

    except TypeError:
        raise TypeError(
            bcolors.FAIL +
            "Invalid Input. Enter a valid email as a string to use this function"
            + bcolors.ENDC)
        return False
    except ValueError:
        raise ValueError(bcolors.FAIL + "No user found with that email" +
                         bcolors.ENDC)
        return False
示例#3
0
def find_user_by_name(name):
	'''
	(str) --> list
	Returns a list of users info given a name.
	'''
	try:
		start_time = time.time()
		
		db = database()
		data = []
		if type(name) != str:
			raise TypeError 
		name = name.lower()

		for i in db.users.find():
			data.append(i)

		#seperates string into array of words	
		temp_name = re.findall(r'\w+', name)

		user_list = []
		#changes search parameters based on number of words in search
		if len(temp_name) == 1:
			#searches for name in the first and last names of people in the profile
			for i in range(len(data)):
				temp_profile = data[i].get("profile")
				if name in temp_profile.get("firstName").lower() or name in temp_profile.get("lastName").lower() :
					user_list = user_list + [data[i]]

		elif len(temp_name) == 2:
			#searches for the full name with exact match
			temp_firstName = temp_name[0]
			temp_lastName = temp_name[1]
			for i in range(len(data)):
				temp_profile = data[i].get("profile")
				if temp_firstName in temp_profile.get("firstName").lower() and temp_lastName in temp_profile.get("lastName").lower():  
					user_list = user_list + [data[i]]

		elif len(temp_name) > 2:
			#if more than three words are in name search, checks each word against first and last name 
			for i in range(len(data)):
				for c in range(len(temp_name)):
					temp_profile = data[i].get("profile")
					if temp_name[c] in temp_profile.get("firstName").lower() or temp_name[c] in temp_profile.get("lastName").lower():
						user_list = user_list + [data[i]]

		if len(user_list) == 0:
			raise ValueError

		for i in range(len(user_list)):
			print user_list[i]
			print ""
		return user_list

	except TypeError:
		raise TypeError(bcolors.FAIL + "Invalid Input. Enter a valid name as a string to use this function" + bcolors.ENDC)
		return False
	except ValueError:
		raise ValueError(bcolors.FAIL + "Name not found in database" + bcolors.ENDC)
示例#4
0
def parse_user_site(user_id):
    '''
	(str) --> bool
	Parses a user's site given a user id.
	'''
    try:
        start_time = time.time()
        db = database()
        if type(user_id) != str and type(user_id) != unicode:
            raise TypeError
        user_id = str(user_id)

        user = db.users.find_one({"_id": user_id})
        if user:
            url_temp = user.get("profile").get("url")
        else:
            raise ValueError

        print bcolors.OKGREEN + ("parsing through: " + url_temp) + bcolors.ENDC
        user_id = user.get("_id")

        title = get_title(url_temp)
        last_parsed = time.strftime("%c")

        db.users.update({"profile.url": url_temp}, {
            "$set": {
                "profile.title": title,
                "profile.lastParsed": last_parsed
            }
        }, False, True)

        # there will be 2 types of tags,
        # from website, and from pdf
        tags_temp = get_html(url_temp)  # this is all words from the html
        tagsPDF_temp = get_pdf(url_temp)  # this is all words from the pdf
        keyword_list = []
        for keyword in tags_temp:
            try:
                print bcolors.OKGREEN + (
                    "Incremented: %s to %d" %
                    (keyword, increment_word(keyword))) + bcolors.ENDC
                keyword_list.append(keyword)
            except Exception, e:
                print bcolors.FAIL + "Invalid Entry" + bcolors.ENDC
                print e

        for keyword in tagsPDF_temp:
            try:
                if keyword not in keyword_list:
                    print bcolors.OKGREEN + (
                        "Incremented: %s to %d" %
                        (keyword, increment_word(keyword))) + bcolors.ENDC

            except Exception, e:
                print bcolors.FAIL + "Invalid Entry" + bcolors.ENDC
                print e
示例#5
0
def count_total_words():
	"""
	() --> integer

	Counts the total number of words in the database that have been counted more than once, and returns an integer value.
	"""
	db = database()
	counter = 0

	for i in db.word_count.find({"total" : {'$gt' : 1} }):
		counter += i.get("total")
	
	return counter
示例#6
0
def count_distinct_words():
	"""
	() --> integer

	Counts the number of distinct words in the database which have appeared more than once, and returns an integer value.
	"""
	db = database()
	counter = 0

	for i in db.word_count.find({"total" : {'$gt' : 1} }):
		counter += 1
	
	return counter
示例#7
0
def calculate_keywords():
	"""
	() --> list

	Returns a list of all the keywords in the database which are less than 0.8414 standard deviations
	above the mean (bottom 80%)
	"""
	db = database()
	data = []

	for i in db.word_count.find():
		data.append(i)

	sorted_list = []
	avg_val = _average_count()
	std_dev = _std_count()
	num_list = []

	# creates a list with all the total values
	for i in range(len(data)):
		num_list = num_list + [data[i].get("total")]

	#print num_list
	#creates a list that lists the number of standard deviations from the mean each index is

	std_list = []
	for i in num_list:
		std_list.append((avg_val-i)/std_dev)
	
	#filters out any values greater than 0.8416 standard deviations above the mean from num_list
	for i in range(len(std_list)):
		if std_list[i] < -0.8416:
			#assigns 0 value to values out of range
			num_list[i] = 0
		
	sorted_list = []
	highest_val = max(num_list)
	word_list = []
	#adds the highest name/value pair from num_list to sorted list then removes it from num_list 
	while highest_val != 0:

		for i in range(len(data)):
			if data[i].get("total") == highest_val and data[i].get("word") not in word_list and num_list[i] != 0:
				sorted_list.append([data[i].get("word"),data[i].get("total")])
				word_list.append(data[i].get("word"))
				num_list[i] = 0
				
		highest_val = max(num_list)
	print sorted_list
	return sorted_list
示例#8
0
def get_all_urls():
    """
	() --> list

	Returns a list of all the urls users have submitted.
	"""
    db = database()
    url_list = []
    data = []

    for i in db.users.find():
        if i.get("profile").get("url"):
            url_list.append(i.get("profile").get("url"))

    return url_list
示例#9
0
def get_all_urls():
	"""
	() --> list

	Returns a list of all the urls users have submitted.
	"""
	db = database()
	url_list = []
	data = []

	for i in db.users.find():
		if i.get("profile").get("url"):
			url_list.append(i.get("profile").get("url"))
	
	return url_list
示例#10
0
def parse_user_site(user_id):
	'''
	(str) --> bool
	Parses a user's site given a user id.
	'''
	try:
		start_time = time.time()
		db = database()
		if type(user_id) != str and type(user_id) != unicode:
			raise TypeError
		user_id = str(user_id)
		
		user = db.users.find_one({"_id" : user_id})
		if user:
 			url_temp = user.get("profile").get("url")
		else:
			raise ValueError

		print bcolors.OKGREEN + ("parsing through: " + url_temp) + bcolors.ENDC
		user_id = user.get("_id")
		

		title = get_title(url_temp)
		last_parsed = time.strftime("%c")

		db.users.update({"profile.url" : url_temp}, { "$set": {"profile.title" : title, "profile.lastParsed" : last_parsed}},False,True)

		# there will be 2 types of tags,
		# from website, and from pdf
		tags_temp = get_html(url_temp) # this is all words from the html
		tagsPDF_temp = get_pdf(url_temp) # this is all words from the pdf
		keyword_list = []
		for keyword in tags_temp:
			try:
				print bcolors.OKGREEN + ("Incremented: %s to %d" % (keyword, increment_word(keyword))) + bcolors.ENDC
				keyword_list.append(keyword)
			except Exception, e:
				print bcolors.FAIL + "Invalid Entry" + bcolors.ENDC
				print e

		for keyword in tagsPDF_temp:
			try:
				if keyword not in keyword_list:
					print bcolors.OKGREEN + ("Incremented: %s to %d" % (keyword, increment_word(keyword))) + bcolors.ENDC

			except Exception, e:
				print bcolors.FAIL + "Invalid Entry" + bcolors.ENDC
				print e
示例#11
0
def _std_count():
	"""
	() --> integer

	Calculates the standard deviation of the number of repititions a words has in the database and returns an integer value.
	"""
	
	db = database()
	data = []	
	count_list = []
	for i in db.word_count.find():
		if i.get("total"):
			count_list.append(i.get("total"))

	print "Standard Deviation: " + str(numpy.std(count_list))
	return numpy.std(count_list)
示例#12
0
def _insert_word(word):
	"""
	(string) --> boolean

	Inserts a word into the database with a count of zero, and returns true, unless it already exists, in which case it returns false.
	"""
	db = database()

	data = db.word_count.find_one({"word" : word})
	
	if data:
		raise LookupError("Word: %s already exists in database" % word)

	else:
		db.word_count.insert({"word": word, "total": 0})
		return True
示例#13
0
def delete_all_keywords():
    '''
	() --> bool
	Empties the keyword database for all users.
	'''
    try:
        db = database()

        #deletes existing data
        db.keywords_coll.drop()
        db.word_count.drop()
        print "All Entries Deleted"
        return True

    except Exception as e:
        print e
        return False
示例#14
0
def delete_all_keywords():
	'''
	() --> bool
	Empties the keyword database for all users.
	'''
	try:
		db = database()
		
		#deletes existing data
		db.keywords_coll.drop()
		db.word_count.drop()
		print "All Entries Deleted"
		return True

	except Exception as e:
		print e
		return False
示例#15
0
def parse_all_users():
	'''
	() --> bool
	Parse the sites of all users in database.
	'''
	try:
		db = database()
		#parses through every users' site
		for i in db.users.find():
			try:
				parse_user_site(i.get("_id"))
			except Exception as e:
				print e
				pass
		return True

	except Exception as e:
		print e
		return False
示例#16
0
def parse_all_users():
    '''
	() --> bool
	Parse the sites of all users in database.
	'''
    try:
        db = database()
        #parses through every users' site
        for i in db.users.find():
            try:
                parse_user_site(i.get("_id"))
            except Exception as e:
                print e
                pass
        return True

    except Exception as e:
        print e
        return False
示例#17
0
def increment_word(word):
	"""
	(string) --> integer

	Adds one to the total number of a word unless it doesn't exist in the database, in which case it adds it, with an initial count of 1, returning an integer of it's count.
	"""
	db = database()
	word = str(word)
	data = db.word_count.find_one({"word" : word})

	if not data:
		_insert_word(word)
	
	data = db.word_count.find_one({"word" : word})
	if data:
		count = data.get("total") + 1
		
		db.word_count.update({ "word": word}, {"word": word, "total": count})
		return count
示例#18
0
def delete_user_keywords(user_id):
    '''
	(str) --> bool
	Deletes all the keywords of a user, given an id.
	'''
    try:
        db = database()
        if type(user_id) != str and type(user_id) != unicode:
            raise TypeError
        data = db.users.find_one({"_id": user_id})
        if data:
            db.keywords_coll.delete_many({"user_id": user_id})
        else:
            raise ValueError

        print "User Entries Deleted"
        return True

    except TypeError, e:
        print e
        raise TypeError(bcolors.FAIL + "Invalid ID type" + bcolors.ENDC)
        return False
示例#19
0
def find_user_by_id(user_id):
	'''
	(str) --> dict
	Returns a user's info given their id
	'''
	try:
		db = database()
		if type(user_id) != str:
			raise TypeError 

		data = db.users.find_one({"_id" : user_id})
		if data:
			return data
		else:
			raise ValueError

	except TypeError:
		raise TypeError(bcolors.FAIL + "Invalid Input. Enter a valid user id as a string to use this function" + bcolors.ENDC)
		return False
	except ValueError:
		raise ValueError(bcolors.FAIL + "Invalid User ID" + bcolors.ENDC)
		return False
示例#20
0
def delete_user_keywords(user_id):
	'''
	(str) --> bool
	Deletes all the keywords of a user, given an id.
	'''
	try:
		db = database()
		if type(user_id) != str and type(user_id) != unicode:
			raise TypeError 
		data = db.users.find_one({"_id" : user_id})
		if data:
			db.keywords_coll.delete_many({"user_id": user_id})
		else:
			raise ValueError	

		print "User Entries Deleted"
		return True

	except TypeError, e:
		print e
		raise TypeError(bcolors.FAIL + "Invalid ID type" + bcolors.ENDC)
		return False
示例#21
0
def find_user_by_email(email):
	'''
	(str) --> dict
	Returns a user's info given their email
	'''
	try:
		db = database()
		data = []

		if type(email) != str:
			raise TypeError 

		data = db.users.find_one({"emails.address" : email})
		if data:
			return data
		else:
			raise ValueError

	except TypeError:
		raise TypeError(bcolors.FAIL +"Invalid Input. Enter a valid email as a string to use this function" + bcolors.ENDC)
		return False
	except ValueError:
		raise ValueError(bcolors.FAIL + "No user found with that email" + bcolors.ENDC)
		return False
示例#22
0
def _find_skills(bodyStr):
	"""
	(str) --> array

	Searches through the html and pdf for skills in a txt file.
	"""
	try:
		if type(bodyStr) != str and type(bodyStr) != unicode:
			raise TypeError
	except TypeError:
		raise TypeError(bcolors.FAIL + "bodyStr must be a string" + bcolors.ENDC)

	db = database()
	skill_words = []

	# find words that are matching with our skills vocabulary
	data = []
	lstSkills = []
	for i in db.skill_coll.find():
		data = data + [i]
	for j in range(len(data)):
		lstSkills = lstSkills + data[j].get("skill")

	counter = True

	# we will first check if the person has "skill" defined,
	# and if "skill" is not found, then try to collect all the vocabularies appearing in the body.
	if ("skill" in bodyStr):
		skill_tag = bodyStr[bodyStr.find("skill"):]

		# if "skill" was defined, then it will collect vocabularies after the definition
		for i in lstSkills:
			temp_i = i
			temp_i = temp_i.replace('\n', "")
			temp_i = temp_i.replace('\r', "")
			temp_i = str(temp_i)
			# fix the format as usual

			if temp_i.lower() in skill_tag:
				skill_words = skill_words + [temp_i]

			# this is special case, when person has C and/or R skills.
			# those skills (one worded) can be found by adding spaces
			# eg. "c" -> " c "
			# Did not add "GO" language skill due to huge problem.
			if " c " in skill_tag and counter == True:
				skill_words = skill_words + ["C"]
				counter = False

			if " r " in skill_tag and counter == True:
				skill_words = skill_words + ["R"]
				counter = False

			else:
				()
	else:
		# it will do the same, but for entire bodyStr, which will take longer time.
		for i in lstSkills:
			temp_i = i
			temp_i = temp_i.replace('\n', "")
			temp_i = temp_i.replace('\r', "")
			temp_i = str(temp_i)
			if temp_i.lower() in bodyStr:
				skill_words = skill_words + [temp_i]

			if " c " in bodyStr and counter == True:
				skill_words = skill_words + ["C"]
				counter = False

			if " r " in bodyStr and counter == True:
				skill_words = skill_words + ["R"]
				counter = False

			else:
				()
	return skill_words
示例#23
0
def find_user_by_name(name):
    '''
	(str) --> list
	Returns a list of users info given a name.
	'''
    try:
        start_time = time.time()

        db = database()
        data = []
        if type(name) != str:
            raise TypeError
        name = name.lower()

        for i in db.users.find():
            data.append(i)

        #seperates string into array of words
        temp_name = re.findall(r'\w+', name)

        user_list = []
        #changes search parameters based on number of words in search
        if len(temp_name) == 1:
            #searches for name in the first and last names of people in the profile
            for i in range(len(data)):
                temp_profile = data[i].get("profile")
                if name in temp_profile.get("firstName").lower(
                ) or name in temp_profile.get("lastName").lower():
                    user_list = user_list + [data[i]]

        elif len(temp_name) == 2:
            #searches for the full name with exact match
            temp_firstName = temp_name[0]
            temp_lastName = temp_name[1]
            for i in range(len(data)):
                temp_profile = data[i].get("profile")
                if temp_firstName in temp_profile.get("firstName").lower(
                ) and temp_lastName in temp_profile.get("lastName").lower():
                    user_list = user_list + [data[i]]

        elif len(temp_name) > 2:
            #if more than three words are in name search, checks each word against first and last name
            for i in range(len(data)):
                for c in range(len(temp_name)):
                    temp_profile = data[i].get("profile")
                    if temp_name[c] in temp_profile.get("firstName").lower(
                    ) or temp_name[c] in temp_profile.get("lastName").lower():
                        user_list = user_list + [data[i]]

        if len(user_list) == 0:
            raise ValueError

        for i in range(len(user_list)):
            print user_list[i]
            print ""
        return user_list

    except TypeError:
        raise TypeError(
            bcolors.FAIL +
            "Invalid Input. Enter a valid name as a string to use this function"
            + bcolors.ENDC)
        return False
    except ValueError:
        raise ValueError(bcolors.FAIL + "Name not found in database" +
                         bcolors.ENDC)