def main():
	print("GO")
	start_time = time.time()
	document_map = {}
	try:
		cand_table = db["candidate"]
		#characteristic_id_list = list(ideal_table.find({}, {"global_job_category_id": 1, "Skills": 1}).distinct("global_job_category_id"))
		characteristic_list = getCharacteristicMap()
		
		#print("%s" % characteristic_list)
		print("%s" % len(characteristic_list))
		print("---query Time: %s seconds ---" % (time.time() - start_time))
		print("done")
		skip_amount = 0
		cand_count = 0
		total_cand = cand_table.count()
		print("%s" % total_cand)
		start_delta = 200000
		total_cand = 300000

		while (start_delta + skip_amount) < total_cand:
			candidate_list = list(cand_table.find({}, {"candidate_id":1,"job_skill_names": 1}).skip(start_delta + skip_amount).limit(2500))

			for candidate in candidate_list:
				cand_count += 1
				cand_cat_count = 0
				print("Running candidate - %s" % cand_count)
				classification_array = []
				for key, record in characteristic_list.items():
					if matchSkills(candidate["job_skill_names"], record["Skills"], .75):
						cand_cat_count += 1
						if document_map.get(key) is None:
							document_map[key] = []
						if candidate["candidate_id"] not in document_map[key]:
							document_map[key].append(candidate["candidate_id"])
				print("Added to %s categories" % cand_cat_count)

			#print("%s" % json.dumps(document_map, default=obj_dict))
			skip_amount += 2500

		count = 0
		text_file.write("[")
		for key, val in document_map.items():
			record = ClassifierObject(key, val)
			recordJson = record.toJson()
			text_file.write("%s" % recordJson)
			count += 1
			if count < len(document_map):
				text_file.write(",")

		text_file.write("]")
		text_file.close()

		print("---Parse Time: %s seconds ---" % (time.time() - start_time))

	except Exception as e:
		DebugException(e)
		print("---Run Time: %s seconds ---" % (time.time() - start_time))
def main():
    print("GO")
    start_time = time.time()
    document_map = {}
    try:
        cand_table = db["candidate_skills_from_parsed_resumes"]
        #characteristic_id_list = list(ideal_table.find({}, {"global_job_category_id": 1, "Skills": 1}).distinct("global_job_category_id"))
        characteristic_list = getCharacteristicMap()
        #print("%s" % characteristic_list)
        print("Total Number of Job Categories - %s" % len(characteristic_list))
        print("---Job Categories Retrieval Time: %s seconds ---" % (time.time() - start_time))
        #print("done")
        skip_amount = 0
        cand_count = 0
        total_cand = cand_table.count()
        #print("%s" % total_cand)
        start_delta = 0
        category_map = db["category_candidate_map"]
        total_cand = total_cand/2

        while (start_delta + skip_amount) < total_cand:
            candidate_list = list(cand_table.find({}, {"candidate_id":1,"parsedWords": 1}).skip(start_delta + skip_amount).limit(5000))
            for candidate in candidate_list:
                cand_count += 1
                cand_cat_count = 0
                parsedWords = candidate["parsedWords"]
                candidate_skill_list = []
                for words in parsedWords:
                    candidate_skill_list.append(words["word"].lower())
                printPerThousand = cand_count%1000
                if(printPerThousand == 0):
                    print("Running candidate - %s - Time Taken So Far - %s" % (cand_count,(time.time() - start_time)))
                    printPerThousand = 1
                #print("Running candidate - %s - Time Taken So Far - %s" % (cand_count,(time.time() - start_time)))
                #print("Running candidate - %s - Time Taken So Far - %s" % (cand_count,(time.time() - start_time)))
                classification_array = []
                for key, record in characteristic_list.items():
                    if matchSkills(candidate_skill_list, record["Skills"], Threshold):
                        cand_cat_count += 1
                        if document_map.get(key) is None:
                            document_map[key] = []
                        if candidate["candidate_id"] not in document_map[key]:
                            document_map[key].append(candidate["candidate_id"])
                            #category_map.update({"global_job_category_id":key},{"$addToSet":{"candidates":candidate["candidate_id"]}},True)
                #print("Added to %s categories" % cand_cat_count)

            #print("%s" % json.dumps(document_map, default=obj_dict))
            skip_amount += 5000

        count = 0
        text_file.write("[")
        for key, val in document_map.items():
            record = ClassifierObject(key, val)
            recordJson = record.toJson()
            text_file.write("%s" % recordJson)
            count += 1
            if count < len(document_map):
                text_file.write(",")

        text_file.write("]")
        text_file.close()

        print("---Parse Time: %s seconds ---" % (time.time() - start_time))

    except Exception as e:
        DebugException(e)
        print("---Run Time: %s seconds ---" % (time.time() - start_time))