Пример #1
0
 def __init__(self, chat_id):
     self.tasks = [
         Task1(chat_id),
         Task2(chat_id),
         Task3(chat_id),
         Task4(chat_id)
     ]
     self.chat_id = chat_id
     bot.send_message(chat_id, ''.join(comments[:5]))
     self.choose_grading_system()
Пример #2
0
 def get_100_first_tags(self):
     for filename in os.listdir("month/month_tokenized"):
         if filename.endswith("json"):
             a = Task1("month/month_tokenized/%s" % filename,
                       "month/month_tokenized/month_%s" % filename[:-5])
             a = Task3("month/month_not_tokenized/%s" % filename,
                       "month/month_not_tokenized/month_%s" % filename[:-5])
             a = Task2Tfidf("month/month_tokenized/month_%s" %
                            filename[:-5])
             a.main()
             print("done %s" % filename)
Пример #3
0
 def __solve(self, user_id, show_untagged: bool,
             ignore_private_accounts: bool, use_extended_group_info: bool,
             ignore_empty_friends: bool) -> dict:
     friends = self.__vkApiWrapper.get_friends(user_id)
     task1 = Task1(self.__content_analyzers, self.__vkApiWrapper)
     res = task1.solve(friends, show_untagged, ignore_private_accounts,
                       use_extended_group_info)
     if ignore_empty_friends:
         res = filter(
             lambda p: not (
                 ('groups' in p[1]) and (len(p[1]['groups']) == 0)),
             res.items())
     return dict(res)
Пример #4
0
 def select_task(self, task_num):
     # Plugin class names for each task here
     tasks = {
         "1": Task1(),
         "2a": Task2a(),
         "2b": Task2b(),
         "3": Task3(),
         "4": Task4(),
         "5": Task5Driver(),
         "6a": Task6a(),
         "6b": Task6b()
     }
     # Have a runner method in all the task classes
     tasks.get(task_num).runner()
Пример #5
0

if __name__ == '__main__':
    urls = get_urls(URLS_DB)

    client = GearmanClient(['10.61.0.145'])
    tasks = Taskset()
    TASK_URLS_NUM = 100
    # disptribute task
    i = 0
    while i < len(urls):
        sub_urls = urls[i:i + TASK_URLS_NUM]
        workload = '\n'.join(sub_urls)
        t = Task1('crawl',
                  workload,
                  str(i),
                  timeout=TASK1_TIMEOUT,
                  retry_count=1)
        tasks.add(t)
        print "add task:%s" % t.uniq
        i += TASK_URLS_NUM
        # test
        pass

    # 0.init database for return result from worker
    print "0.initialize database for results."
    tmps = ["%s int" % cate for cate in CATES]
    cates_str = ','.join(tmps)
    tb_sql = "create table %s (word text primary key,%s,total int);" % (
        RAW_WORDS_TB, cates_str)
    print tb_sql
Пример #6
0
    def runner(self):
        """
		Method: runner implemented for all the tasks, 
		takes user input for type of entity from list of User,Image and Location
		and respective entity_id
		Displays the top 5 entities with respect to input entity 
		using the latent semantics obtained from task1 for respective entity vector space
		"""
        #k = input("Enter the value of k :")
        #start = time.time()
        k = input("Enter the value of k :")

        # user_id = input("Enter the user id: ")
        # image_id = input("Enter the image id: ")
        # location_id = input("Enter the location id: ")

        algo_choice = input("Enter the Algorithm: ")

        entity_index = int(
            input("Choose the entity id \t1) User \t2)Image \t3)Location.: "))

        user_id, image_id, location_id = None, None, None

        if entity_index == 1:
            self.entity_type = constants.USER_TEXT
            user_id = input("Enter the user id: ")

        elif entity_index == 2:
            self.entity_type = constants.IMAGE_TEXT
            image_id = input("Enter the image id: ")

        elif entity_index == 3:
            self.entity_type = constants.LOCATION_TEXT
            location_id = input("Enter the location id: ")
        """
		Get the document term matrix for users,images and locations from task1
		"""
        user_data = Task1()
        user_data.load_data_per_entity(constants.USER_TEXT)
        user_term_matrix = self.get_document_term_matrix(user_data)
        user_term_matrix = self.ut.convert_list_to_numpyarray(
            user_term_matrix).T

        image_data = Task1()
        image_data.load_data_per_entity(constants.IMAGE_TEXT)
        image_term_matrix = self.get_document_term_matrix(image_data)
        image_term_matrix = self.ut.convert_list_to_numpyarray(
            image_term_matrix).T

        location_data = Task1()
        location_data.load_data_per_entity(constants.LOCATION_TEXT)
        location_term_matrix = self.get_document_term_matrix(location_data)
        location_term_matrix = self.ut.convert_list_to_numpyarray(
            location_term_matrix).T

        if self.entity_type == constants.USER_TEXT:
            try:
                self.user_index = list(
                    user_data.data.master_dict.keys()).index(user_id)
            except ValueError:
                raise ValueError(constants.USER_ID_KEY_ERROR)
            pass
        elif self.entity_type == constants.IMAGE_TEXT:
            try:
                self.image_index = list(
                    image_data.data.master_dict.keys()).index(image_id)
            except ValueError:
                raise ValueError(constants.IMAGE_ID_KEY_ERROR)
            pass
        elif self.entity_type == constants.LOCATION_TEXT:
            try:
                input_location = self.mapping[location_id]
                self.location_index = list(
                    location_data.data.master_dict.keys()).index(
                        input_location)
            except ValueError:
                raise ValueError(constants.LOCATION_ID_KEY_ERROR)
            pass

        if algo_choice == 'SVD' or algo_choice == 'PCA':
            pca = False
            if algo_choice == 'PCA':
                pca = True
            """
			Decompose the original document term matrix into U,S and Vt using SVD
			For PCA we pass pca flag to indicate the passing of covariance matrix in the SVD method.
			"""
            user_u_matrix, user_S_matrix, user_vt_matrix = self.dim_reduce_SVD(
                user_term_matrix, k, pca)
            image_u_matrix, image_S_matrix, image_vt_matrix = self.dim_reduce_SVD(
                image_term_matrix, k, pca)
            location_u_matrix, location_S_matrix, location_vt_matrix = self.dim_reduce_SVD(
                location_term_matrix, k, pca)
            """
			Get the latent semantics for users, images and locations
			"""
            user_semantics_map, image_semantics_map,location_semantics_map = \
              self.get_all_latent_semantics_map(user_data,image_data,location_data,
               user_u_matrix,image_u_matrix,location_u_matrix)

            self.user_semantics_map = user_semantics_map
            self.image_semantics_map = image_semantics_map
            self.location_semantics_map = location_semantics_map
            """
			Get the similar cross entities given a entity id. eg userid -> similar users, images,
			and locations,  imageid -> similar images, locations and users.
			"""
            self.get_similar_entities(user_term_matrix, image_term_matrix,
                                      location_term_matrix, user_S_matrix,
                                      user_vt_matrix, image_S_matrix,
                                      image_vt_matrix, location_S_matrix,
                                      location_vt_matrix, user_id, image_id,
                                      location_id)

        elif algo_choice == 'LDA':
            """
			Decompose the original document term matrix into U,S and Vt using LDA
			"""
            user_u_matrix, user_S_matrix, user_vt_matrix = self.dim_reduce_LDA(
                user_term_matrix, k)
            image_u_matrix, image_S_matrix, image_vt_matrix = self.dim_reduce_LDA(
                image_term_matrix, k)
            location_u_matrix, location_S_matrix, location_vt_matrix = self.dim_reduce_LDA(
                location_term_matrix, k)

            user_semantics_map, image_semantics_map,location_semantics_map = \
              self.get_all_latent_semantics_map(user_data,image_data,location_data,
               user_u_matrix,image_u_matrix,location_u_matrix)

            self.user_semantics_map = user_semantics_map
            self.image_semantics_map = image_semantics_map
            self.location_semantics_map = location_semantics_map

            self.get_similar_entities(user_term_matrix, image_term_matrix,
                                      location_term_matrix, user_S_matrix,
                                      user_vt_matrix, image_S_matrix,
                                      image_vt_matrix, location_S_matrix,
                                      location_vt_matrix, user_id, image_id,
                                      location_id)

        #print("Seconds",time.time() - start)
Пример #7
0
 def get_exe_time(rdd):
     t0 = time.time()
     Task1.top_10_businesses_had_largest_numbers_of_reviews_and_number_of_reviews_they_had(rdd)
     return time.time() - t0
 def __init__(self):
     self.models = constants.MODELS
     data_extractor = DataExtractor()
     mapping = data_extractor.location_mapping()
     self.locations = list(mapping.values())
     self.task1 = Task1()
Пример #9
0
 def open_task1(self):
     self.task1 = Toplevel(self.master)
     self.app = Task1(self.task1)
Пример #10
0
	def select_task(self, task_num):
		# Plugin class names for each task here
		tasks = { 1: Task1(), 2: Task2(), 3: Task3(), 4: Task4(), 5: Task5(), 6: Task6(), 7: Task7() }
		# Have a runner method in all the task classes
		tasks.get(task_num).runner()