def marked_sample(cls, result_id, label): # trigger two event result = CSResultBusiness.read_by_id(result_id) result.label = label # print('after_result: ', result.to_mongo()) result.save() # 打一个标都记录上去, 同一个标不重复记录 task = cls._add_marked_count(result.task.id) # task.save() # 如果打完标签 results = CSResultBusiness.read_all_by_id(task, task.evaluation) sample_marked_for_user, need_marked_times, marked_times = \ cls._compute_sample_marked_count(results) if sample_marked_for_user >= task.total_count: # send notification admin_user = UserBusiness.get_by_user_ID('admin') # reciever = UserBusiness.get_by_id(task.sponsor.id) # send notification MessageService.create_message( admin_user, 'cs_task_done', [task.sponsor], user=task.sponsor, task=result.task, project=result.task.module, ) # all people is labeled, finish this task if task.marked_count == task.total_count: task.status = 2 task.save() return cls._add_extra_info_with_task(task) return None
def _dispatch(cls, non_exactly, task_id, evaluation): # non_exactly = cls._get_crowd_sourcing_samples(test) # choose the user task = CSTaskBusiness.get_task_by_id(task_id) count = len(non_exactly) # real of users real_of_users = UserBusiness.get_all() # promise do not send self # print(task.sponsor.user_ID) real_of_users = [ user for user in real_of_users if user.user_ID != task.sponsor.user_ID ] # real_of_users = [user for user in real_of_users # if user.user_ID != 'lu_xu_1'] # print("real_of_users:", [user.user_ID for user in real_of_users]) # print(type(real_of_users)) real_of_user_nums = np.arange(len(real_of_users)).tolist() # dispatch result = {} # ever image has dispatch at least to three people for sample_idx in range(count): result[sample_idx] = { 'user': [ real_of_users[i] for i in random.sample( real_of_user_nums, random.sample(cls._choose_people, 1)[-1]) ], 'sample': non_exactly[sample_idx] } # print([user.user_ID for user in result[sample_idx]['user']]) # 往数据库插入信息 invited_users = [] for key in result.keys(): sample = result[key]['sample'] # 测试账号 # CSResultService.add_result(task_id, evaluation, 'lu_xu_1', sample, # '') for user in result[key]['user']: # add the invited if user.id not in invited_users: invited_users.append(user.id) CSResultBusiness.add_result(task_id, evaluation, user.user_ID, sample, '') # print(user.user_ID) # invited users message = MessageService.create_message( UserBusiness.get_by_user_ID('admin'), 'cs_task_invited', invited_users, project=task.dataset, task=task, ) print('invited:', message.to_mongo())
def get_task_detail(cls, task_id): task = CSTaskBusiness.read_task_by_id(task_id) # former info former = {} for i in range(1, task.evaluation + 1): # read the result for evaluation results = CSResultBusiness.read_all_by_id(task, i + 1) task.evaluation = i file_path = cls._generate_csv_file_for_results( task, task.dataset, task.sponsor.user_ID) # sub_directory = file_path.split('/') # file_path = os.path.join(sub_directory[1], *sub_directory[1:]) # file_path = file_name = str(task.id) + '_' + str(task.evaluation) + '.csv' # file_path = os.path.join('crowdsourcing', task.sponsor.user_ID, # task.module.name, task.dataset_version, file_name) sample_marked_for_user, need_marked_times, marked_times = cls._compute_sample_marked_count( results) if i not in former: former[i] = { 'file_name': file_name, 'file_path': file_path, 'samples': sample_marked_for_user, 'need_marked_times': need_marked_times, 'marked_times': marked_times } if len(former): return former else: return False
def fulfil_task(cls, task_id, user_ID): task = CSTaskBusiness.get_task_by_id(task_id) dataset = ProjectBusiness.get_by_id(task.dataset.id) # complete task.status = 2 task.save() # file_path csv_file = cls._generate_csv_file_for_results(task, dataset, user_ID) # print(csv_file) dest_path = os.path.join(task.module.path, CS_PATH, str(task.id)) if not os.path.exists(dest_path): os.makedirs(dest_path) # dest_path = os.path.join(module_path, task.task_type + '_' + str(task.evaluation)) dest_path = os.path.join(dest_path, csv_file.split('/')[-1]) shutil.copyfile(csv_file, dest_path) # print('error:', task.to_mongo()) results = CSResultBusiness.read_all_by_id(task.id, task.evaluation) sample_marked_for_user, need_marked_times, marked_times = \ cls._compute_sample_marked_count(results) # 结束 if task.evaluation >= 3 or sample_marked_for_user < task.total_count: task.user_operation = True task.save() # add the info task = cls._add_extra_info_with_task(task) print('back') return csv_file, task, dest_path
def _read_tasks_for_status(cls, dataset_id): # read the all dataset tasks = CSTaskBusiness.read_tasks_with_dataset(dataset_id, None) results = [] done_count = 0 doing_count = 0 total_count = 0 users = [] for task in tasks: if task.total_count <= 0: continue result = CSResultBusiness.read_all_by_id(task, task.evaluation) task_results, _ = cls._analyse_labels(result) unique_users = result.distinct('user') unique_users = [user.user_ID for user in unique_users] users.extend(unique_users) results.extend([ result.get('_id') for result in task_results if len(np.unique(result.get('labels'))) > 1 ]) total_count += task.total_count if task.status == 2: done_count += 1 elif task.status == 0: doing_count += 1 users = list(set(users)) return doing_count, done_count, total_count, np.unique( results).tolist(), len(users)
def read_need_to_task(cls, user_ID, task_id, evaluation=1): results = CSResultBusiness.read_by_user_ID_and_task_ID( user_ID, task_id, evaluation) task = CSTaskBusiness.get_task_by_id(task_id) results = [ results[i] for i in range(len(results)) if results[i].label == '' ] part_data = [] for result in results: paths = result.sample.split('/') index = 0 for i in range(len(paths)): if paths[i] == 'datasets': index = i break result.url = os.path.join(task.module.path, *paths[index:len(paths) - 1]) result.filename = paths[-1] part_data.append({ '_id': str(result.id), 'url': result.url, 'filename': result.filename }) # print('results: ', result.to_mongo()) return part_data, task.classes
def _add_extra_info_with_task(cls, task): results = CSResultBusiness.read_all_by_id(task, task.evaluation) users = results.distinct('user') task.invited_users = [] for user in users: task.invited_users.append({ "username": user.username, "user_ID": user.user_ID, "avatar_url": user.avatar_url }) task.user_ID = task.sponsor.user_ID task.username = task.sponsor.username task.avatar_url = task.sponsor.avatar_url # How many People task.accept_user_count = len(task.user_ID) # How many sample is marked task.sample_marked_for_user, task.need_marked_times, task.marked_times = \ cls._compute_sample_marked_count(results) total_count = 0.01 if task.total_count == 0 else task.total_count task.percentage = int(task.sample_marked_for_user / total_count * 100) # tasnform data format task.dataset_version = task.dataset_version.replace('_', '.') task.module_version = task.module_version.replace('_', '.') # How many People task.accept_user_count = len(task.user_ID) # 数据集名字 task.dataset_name = task.dataset.display_name return task
def _add_marked_count(cls, task_id): task = CSTaskBusiness.get_task_by_id(task_id) results = CSResultBusiness.read_all_by_id(task, task.evaluation) task_results, _ = cls._analyse_labels(results) # marked count count = len([ result for result in task_results if len(np.unique(result.get('labels'))) == 1 and np.unique(result.get('labels'))[-1] != '' ]) # print(count) if task.marked_count != count: task.marked_count = count task.save() return task
def _generate_csv_file_for_results(cls, task, dataset, user_ID): # init the path moudle = ProjectBusiness.get_by_id(task.module.id) dataset_path = dataset.dataset_path save_path = os.path.join('crowdsourcing', user_ID, moudle.name, task.dataset_version) csv_file_name = str(task.id) + '_' + str(task.evaluation) + '.csv' # read the result results = CSResultBusiness.read_all_by_id(task, task.evaluation) # create the path store_path = os.path.join(dataset_path, save_path) if not os.path.exists(store_path): os.makedirs(store_path) # get the statistics result full_path = os.path.join(store_path, csv_file_name) _, transform_tasks = cls._analyse_labels(results) # get the labels save_key = [] for sample in transform_tasks: # print(sample) max_count = 0 label = '' for key in sample['label'].keys(): if key != '': if label == '': max_count = sample['label'][key] label = key elif max_count < sample['label'][key]: max_count = sample['label'][key] label = key label = 'None' if label == '' else label save_key.append({'sample': sample['sample'], 'label': label}) # save if len(save_key) <= 0: save_key.append({'sample': 'no samples', 'label': 'no labels'}) # print(save_key) pd.DataFrame(save_key, index=np.arange(len(save_key))).to_csv(full_path, index=None) # move to module # path return full_path
def add_result(cls, task_id, evaluation, user_id, sample, sample_label): return CSResultBusiness.add_result(user_id, task_id, evaluation, sample, sample_label)
def read_by_id(cls, result_id): result = CSResultBusiness.read_by_id(result_id) return result
def read_by_user_and_sample(cls, user_ID, sample, task_id, evaluation): result = CSResultBusiness.read_by_user_and_sample( user_ID, sample, task_id, evaluation) print('result', result) return result
def read_by_user_and_task(cls, user_ID, task_id, evaluation=1): return CSResultBusiness.read_by_user_ID_and_task_ID( user_ID, task_id, evaluation)
def read_all(cls, task, evaluation): return CSResultBusiness.read_all_by_id(task, evaluation)