def __init__(self, chat_id): self.tasks = [ Task1(chat_id), Task2(chat_id), Task3(chat_id), Task4(chat_id) ] self.chat_id = chat_id bot.send_message(chat_id, ''.join(comments[:5])) self.choose_grading_system()
def get_100_first_tags(self): for filename in os.listdir("month/month_tokenized"): if filename.endswith("json"): a = Task1("month/month_tokenized/%s" % filename, "month/month_tokenized/month_%s" % filename[:-5]) a = Task3("month/month_not_tokenized/%s" % filename, "month/month_not_tokenized/month_%s" % filename[:-5]) a = Task2Tfidf("month/month_tokenized/month_%s" % filename[:-5]) a.main() print("done %s" % filename)
def __solve(self, user_id, show_untagged: bool, ignore_private_accounts: bool, use_extended_group_info: bool, ignore_empty_friends: bool) -> dict: friends = self.__vkApiWrapper.get_friends(user_id) task1 = Task1(self.__content_analyzers, self.__vkApiWrapper) res = task1.solve(friends, show_untagged, ignore_private_accounts, use_extended_group_info) if ignore_empty_friends: res = filter( lambda p: not ( ('groups' in p[1]) and (len(p[1]['groups']) == 0)), res.items()) return dict(res)
def select_task(self, task_num): # Plugin class names for each task here tasks = { "1": Task1(), "2a": Task2a(), "2b": Task2b(), "3": Task3(), "4": Task4(), "5": Task5Driver(), "6a": Task6a(), "6b": Task6b() } # Have a runner method in all the task classes tasks.get(task_num).runner()
if __name__ == '__main__': urls = get_urls(URLS_DB) client = GearmanClient(['10.61.0.145']) tasks = Taskset() TASK_URLS_NUM = 100 # disptribute task i = 0 while i < len(urls): sub_urls = urls[i:i + TASK_URLS_NUM] workload = '\n'.join(sub_urls) t = Task1('crawl', workload, str(i), timeout=TASK1_TIMEOUT, retry_count=1) tasks.add(t) print "add task:%s" % t.uniq i += TASK_URLS_NUM # test pass # 0.init database for return result from worker print "0.initialize database for results." tmps = ["%s int" % cate for cate in CATES] cates_str = ','.join(tmps) tb_sql = "create table %s (word text primary key,%s,total int);" % ( RAW_WORDS_TB, cates_str) print tb_sql
def runner(self): """ Method: runner implemented for all the tasks, takes user input for type of entity from list of User,Image and Location and respective entity_id Displays the top 5 entities with respect to input entity using the latent semantics obtained from task1 for respective entity vector space """ #k = input("Enter the value of k :") #start = time.time() k = input("Enter the value of k :") # user_id = input("Enter the user id: ") # image_id = input("Enter the image id: ") # location_id = input("Enter the location id: ") algo_choice = input("Enter the Algorithm: ") entity_index = int( input("Choose the entity id \t1) User \t2)Image \t3)Location.: ")) user_id, image_id, location_id = None, None, None if entity_index == 1: self.entity_type = constants.USER_TEXT user_id = input("Enter the user id: ") elif entity_index == 2: self.entity_type = constants.IMAGE_TEXT image_id = input("Enter the image id: ") elif entity_index == 3: self.entity_type = constants.LOCATION_TEXT location_id = input("Enter the location id: ") """ Get the document term matrix for users,images and locations from task1 """ user_data = Task1() user_data.load_data_per_entity(constants.USER_TEXT) user_term_matrix = self.get_document_term_matrix(user_data) user_term_matrix = self.ut.convert_list_to_numpyarray( user_term_matrix).T image_data = Task1() image_data.load_data_per_entity(constants.IMAGE_TEXT) image_term_matrix = self.get_document_term_matrix(image_data) image_term_matrix = self.ut.convert_list_to_numpyarray( image_term_matrix).T location_data = Task1() location_data.load_data_per_entity(constants.LOCATION_TEXT) location_term_matrix = self.get_document_term_matrix(location_data) location_term_matrix = self.ut.convert_list_to_numpyarray( location_term_matrix).T if self.entity_type == constants.USER_TEXT: try: self.user_index = list( user_data.data.master_dict.keys()).index(user_id) except ValueError: raise ValueError(constants.USER_ID_KEY_ERROR) pass elif self.entity_type == constants.IMAGE_TEXT: try: self.image_index = list( image_data.data.master_dict.keys()).index(image_id) except ValueError: raise ValueError(constants.IMAGE_ID_KEY_ERROR) pass elif self.entity_type == constants.LOCATION_TEXT: try: input_location = self.mapping[location_id] self.location_index = list( location_data.data.master_dict.keys()).index( input_location) except ValueError: raise ValueError(constants.LOCATION_ID_KEY_ERROR) pass if algo_choice == 'SVD' or algo_choice == 'PCA': pca = False if algo_choice == 'PCA': pca = True """ Decompose the original document term matrix into U,S and Vt using SVD For PCA we pass pca flag to indicate the passing of covariance matrix in the SVD method. """ user_u_matrix, user_S_matrix, user_vt_matrix = self.dim_reduce_SVD( user_term_matrix, k, pca) image_u_matrix, image_S_matrix, image_vt_matrix = self.dim_reduce_SVD( image_term_matrix, k, pca) location_u_matrix, location_S_matrix, location_vt_matrix = self.dim_reduce_SVD( location_term_matrix, k, pca) """ Get the latent semantics for users, images and locations """ user_semantics_map, image_semantics_map,location_semantics_map = \ self.get_all_latent_semantics_map(user_data,image_data,location_data, user_u_matrix,image_u_matrix,location_u_matrix) self.user_semantics_map = user_semantics_map self.image_semantics_map = image_semantics_map self.location_semantics_map = location_semantics_map """ Get the similar cross entities given a entity id. eg userid -> similar users, images, and locations, imageid -> similar images, locations and users. """ self.get_similar_entities(user_term_matrix, image_term_matrix, location_term_matrix, user_S_matrix, user_vt_matrix, image_S_matrix, image_vt_matrix, location_S_matrix, location_vt_matrix, user_id, image_id, location_id) elif algo_choice == 'LDA': """ Decompose the original document term matrix into U,S and Vt using LDA """ user_u_matrix, user_S_matrix, user_vt_matrix = self.dim_reduce_LDA( user_term_matrix, k) image_u_matrix, image_S_matrix, image_vt_matrix = self.dim_reduce_LDA( image_term_matrix, k) location_u_matrix, location_S_matrix, location_vt_matrix = self.dim_reduce_LDA( location_term_matrix, k) user_semantics_map, image_semantics_map,location_semantics_map = \ self.get_all_latent_semantics_map(user_data,image_data,location_data, user_u_matrix,image_u_matrix,location_u_matrix) self.user_semantics_map = user_semantics_map self.image_semantics_map = image_semantics_map self.location_semantics_map = location_semantics_map self.get_similar_entities(user_term_matrix, image_term_matrix, location_term_matrix, user_S_matrix, user_vt_matrix, image_S_matrix, image_vt_matrix, location_S_matrix, location_vt_matrix, user_id, image_id, location_id) #print("Seconds",time.time() - start)
def get_exe_time(rdd): t0 = time.time() Task1.top_10_businesses_had_largest_numbers_of_reviews_and_number_of_reviews_they_had(rdd) return time.time() - t0
def __init__(self): self.models = constants.MODELS data_extractor = DataExtractor() mapping = data_extractor.location_mapping() self.locations = list(mapping.values()) self.task1 = Task1()
def open_task1(self): self.task1 = Toplevel(self.master) self.app = Task1(self.task1)
def select_task(self, task_num): # Plugin class names for each task here tasks = { 1: Task1(), 2: Task2(), 3: Task3(), 4: Task4(), 5: Task5(), 6: Task6(), 7: Task7() } # Have a runner method in all the task classes tasks.get(task_num).runner()