def segmentation(self, images, size, file_name, collection): # collection_id """ 检测分割 """ ann_file = xml.dom.minidom.parseString(file_name) # 打开xml文档 annotation = ann_file.documentElement # 得到xml文件对象 obj_list = annotation.getElementsByTagName('object') # 获取object标签对象 segmented = annotation.getElementsByTagName('segmented') obj_cnt = 0 ann_list = [] segmentation = segmented[0].firstChild.data for idx in range(0, obj_list.length): # segmentation = segmented[objCnt].firstChild.data obj_cnt += 1 name = obj_list[idx].getElementsByTagName( "name")[0].childNodes[0].data # 取出当前name标签对之间的数据 bndbox = obj_list[idx].getElementsByTagName("bndbox")[0] xmin = bndbox.getElementsByTagName( "xmin")[0].childNodes[0].data # 下左 xmax = bndbox.getElementsByTagName( "xmax")[0].childNodes[0].data # 下右 ymin = bndbox.getElementsByTagName( "ymin")[0].childNodes[0].data # 上左 ymax = bndbox.getElementsByTagName( "ymax")[0].childNodes[0].data # 上右 labels = Label.query.filter_by(collection_id=collection.id, name=name).first() if not labels: continue # write anns in annStr ann_str = '{' ann_str += '"bbox": [' + xmin + ", " + ymin + ", " + \ str(int(xmax) - int(xmin)) + ', ' + str(int(ymax)-int(ymin)) + '], ' ann_str += '"category_id": ' + str( labels.label_id ) + ',' + '"category_name": ' + '"' + name + '"' + ',' ann_str += '"segmentation": [' + "[" + str( segmentation) + "]" + "]" + "}," ann_list.append(ann_str) next_join = '"classification": ' + '[],' next_join += '"annotation": [' + ''.join(ann_list)[:-1] + ']}' str_list = [self.join_images, next_join] data = ''.join(str_list) # '图片上传时间','xml文件',宽高,图片id up_file = data % (images.create_time, images.site[10:], size.get('height'), size.get('width'), images.id) file_json = fun.upload(up_file, file_ext_name='json') images.status = 3 images.label_path = file_json.get('file_id') db.session.add(images) db.session.commit()
def default(self, images, size): """ 未标记 """ next_join = '"classification": [],' next_join += '"annotation": []' + '}' str_list = [self.join_images, next_join] data = ''.join(str_list) up_file = data % (images.create_time, images.site[10:], size.get('height'), size.get('width'), images.id) file_json = fun.upload(up_file, file_ext_name='json') images.status = 0 images.label_path = file_json.get('file_id') db.session.add(images) db.session.commit()
def spider( spider_dict ): # spider_dict 是后端从前端post表单获取到用户给的参数形式为{'keyword':xx, 'spider_page_num':xx, 'start_page':xx, 'number':xx} """爬虫任务""" from DataSet.rabbitMQ.task_queue_client import spider_task with app.app_context(): site_list = list() print spider_dict spider_image_list = spider_task(spider_dict) spider_image_list = json.loads(spider_image_list) print spider_image_list for spider_image in spider_image_list: file = urllib2.urlopen(spider_image) tmpIm = cStringIO.StringIO(file.read()) # c = tmpIm.read()s site = fun.upload(tmpIm.read(), file_ext_name='jpg') site_list.append(site) # 数据库存储 for spider_file in site_list: image = Image() image.name = spider_file.get('filename') image.site = spider_file.get('file_id') spider_dict1 = json.loads(spider_dict) print(spider_dict1['collection_id']) image.collection_id = spider_dict1['collection_id'] print image.collection_id db.session.add(image) db.session.commit() images = Image.query.filter_by(name=image.name).first() image_url_path = current_app.config[ 'NGINX_SITE'] + fun.getInfo(images.site)['group_file_id'] file = urllib2.urlopen(image_url_path) size = image_size(file.read()) clf.default(images, size) collection_id = spider_dict1['collection_id'] same_images_clean(collection_id) return site_list
def classification(self, images, size, category_id, category_name): """ 分类/人脸识别 """ next_join = '"classification": [' + '{' next_join += '"category_id": %s,' + '"category_name": "%s"' + '}' + '],' next_join += '"annotation": []' + '}' strlist = [self.join_images, next_join] data = ''.join(strlist) up_file = data % (images.create_time, images.site[10:], size.get('height'), size.get('width'), images.id, category_id, category_name) print 111 print type(up_file) print up_file file_json = fun.upload(up_file, file_ext_name='json') print 222 print file_json images.status = 3 images.label_path = file_json.get('file_id') db.session.add(images) db.session.commit()
def preprocessing(collection_id, label_info): """预标注任务""" from DataSet.rabbitMQ.task_queue_client import preprocessing_task with app.app_context(): # 开辟一个上下文管理空间 # get config model = -1 pre_model = list() label_name_list = list() c_label_name_list = list() for key in label_info: model = int(key.split('_')[0]) pre_model.append(int(key.split('_')[1])) for k in label_info[key]: label_name_list.append(label_info[key][k]) c_label_name_list.append(k) images = None try: images = Image.query.filter_by(collection_id=collection_id, status=1).all() except Exception as e: current_app.logger.error(e) if not images: print(label_info, 'no images') for image in images: # get image url image_obj = Image.query.filter_by(id=image.id).first() image_url_path = current_app.config['NGINX_SITE'] + fun.getInfo( image_obj.site)['group_file_id'] file = urllib2.urlopen(image_url_path) # 获取图片 # get image file tmpIm = cStringIO.StringIO(file.read( )) # Return a StringIO-like stream for reading or writing img = image_pil.open(tmpIm) # PIL模块处理图像 if img.mode in ('RGBA', 'LA', 'P'): background = image_pil.new('RGB', img.size) background.paste(img) img = background # base64 bs = BytesIO() img.save(bs, format="JPEG") b64 = base64.b64encode(bs.getvalue()) # get request type_b64_list = list() type_b64_list.append(b64) type_b64_list.append(model) type_b64_list.append(pre_model) # request json_data = preprocessing_task(type_b64_list) # 指定预处理队列处理 json_data = json.loads(json_data) # print( 'label results ', json_data ) # get label label_url_path = current_app.config['NGINX_SITE'] + fun.getInfo( image_obj.label_path)['group_file_id'] # 读取json文件内容 file = urllib2.urlopen(label_url_path) tmpIm = cStringIO.StringIO(file.read()) label_data = tmpIm.read() label_json_data = json.loads(label_data) # print( 'old label_json_data ', label_json_data ) isSuccess = False # 模型1 if model == 1: labels = list() for key in json_data: labels = labels + json_data[key]['labels'] for label_dict in labels: if label_dict['content'] in c_label_name_list: label_name = label_name_list[c_label_name_list.index( label_dict['content'])] label = Label.query.filter_by( collection_id=collection_id, name=label_name).first() # 创建要保存的标签字典 classification = dict() classification['category_id'] = label.label_id classification['category_name'] = label_name label_json_data['classification'] = classification label.count += 1 isSuccess = True else: print('other label: ', label_dict['content']) # 模型2 elif model == 2: detection = list() for key in json_data: detection = detection + json_data[key]['detection'] for label_dict in detection: pmid = label_dict['pre_model'] if float(label_dict['score']) > 0.3: AI_label_name = AI_label_list[pmid][ label_dict['label_id']].decode('utf-8') # print( 'c_label_name_list', AI_label_name, c_label_name_list ) if AI_label_name in c_label_name_list: label_name = label_name_list[ c_label_name_list.index(AI_label_name)] label = Label.query.filter_by( collection_id=collection_id, name=label_name).first() # 标注框坐标转换 bbox_list = list() bbox_list.append('%.2f' % (float(label_dict['xmin']))) bbox_list.append('%.2f' % (float(label_dict['ymin']))) bbox_list.append('%.2f' % (float(label_dict['xmax']) - float(label_dict['xmin']))) bbox_list.append('%.2f' % (float(label_dict['ymax']) - float(label_dict['ymin']))) # 创建json annotation字典 annotation = dict() annotation['bbox'] = bbox_list annotation['category_id'] = label.label_id annotation['category_name'] = label.name annotation['segmentation'] = [] if 'feaData' in label_dict: annotation['feaData'] = label_dict['feaData'] label_json_data['annotation'].append(annotation) label.count += 1 isSuccess = True if isSuccess: # print( 'label_json_data', label_json_data ) label_data = json.dumps(label_json_data) # 保存修改后的json文件 new_label_path = fun.upload(label_data, file_ext_name='json') # 删除原json文件 fun.remove(image.label_path) # 修改数据库json文件存储路径 image_obj.label_path = new_label_path['filename'] # 修改图片状态 image_obj.status = 2 db.session.commit() else: image_obj.status = 2 db.session.commit()
def save_label(): """标注保存""" data = request.get_json() state = data.get('classification') collection_id = data.get('collection_id') collection_type = Collection.query.filter_by(id=collection_id).first().type # 分类/人脸识别标注 if state: label_id = state['category_id'] category_name = state['category_name'] if category_name is not None: try: label = Label.query.filter_by(collection_id=collection_id, label_id=label_id).first() except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.DBERR, err_desc='查询失败') if not label: return jsonify(err_no=RET.NODATA, err_desc='没有该标签') # 创建要保存的标签字典 classification = dict() classification['category_id'] = label_id classification['category_name'] = category_name image_site = data.get('image') try: image = Image.query.filter_by( site=str(image_site).split('/', 4)[4]).first() except: current_app.logger.error(image_site) return jsonify(err_no=RET.DBERR, err_desc='查询失败') if image.status == 3: return jsonify(err_no=RET.NODATA, err_desc='请勿重复标记分类图片') label_url_path = current_app.config['NGINX_SITE'] + fun.getInfo( image.label_path)['group_file_id'] # 读取json文件内容 file = urllib2.urlopen(label_url_path) tmpIm = cStringIO.StringIO(file.read()) label_date = tmpIm.read() label_json_date = json.loads(label_date) label_json_date['classification'] = [classification] label_date = json.dumps(label_json_date) # 保存修改后的json文件 new_label_path = fun.upload(label_date, file_ext_name='json') # 删除原json文件 fun.remove(image.label_path) # 修改数据库json文件存储路径 image.label_path = new_label_path['filename'] # 修改图片状态 if collection_type == 1: pass # image.status = 3 label.count += 1 db.session.commit() else: return jsonify( err_no=RET.PARAMERR, err_desc='参数缺失', ) # 检测/分割标注 state = data.get('annotation') if state: for label_dict in data.get('annotation'): category_id = label_dict['category_id'] category_name = label_dict['category_name'] bbox = label_dict['bbox'] segmentation = label_dict['segmentation'] if category_name and bbox is not None: try: label = Label.query.filter_by( collection_id=collection_id, label_id=category_id).first() except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.DBERR, err_desc='查询失败') if not label: return jsonify(err_no=RET.NODATA, err_desc='没有该标签') try: for num in bbox: num = float(num) if math.isnan(num): current_app.logger.error(num) return jsonify(err_no=RET.DBERR, err_desc='参数错误') except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.DBERR, err_desc='参数错误') # 创建要保存的标签字典 annotation = dict() annotation['bbox'] = bbox annotation['category_id'] = category_id annotation['category_name'] = category_name annotation['segmentation'] = segmentation image_site = data.get('image') image = Image.query.filter_by( site=str(image_site).split('/', 4)[4]).first() label_url_path = current_app.config['NGINX_SITE'] + fun.getInfo( image.label_path)['group_file_id'] # 读取json文件内容 file = urllib2.urlopen(label_url_path) tmpIm = cStringIO.StringIO(file.read()) label_date = tmpIm.read() label_json_date = json.loads(label_date) label_json_date['annotation'].append(annotation) label_date = json.dumps(label_json_date) # 保存修改后的json文件 new_label_path = fun.upload(label_date, file_ext_name='json') # 删除原json文件 fun.remove(image.label_path) # 修改数据库json文件存储路径 image.label_path = new_label_path['filename'] # 修改图片状态 if collection_type == 2: pass # image.status = 3 label.count += 1 db.session.commit() else: return jsonify(err_no=RET.PARAMERR, err_desc='参数缺失') return jsonify(err_no=RET.OK, err_desc='标注成功')
def delete_label(): """删除标签""" data = request.get_json() state = data.get('classification') collection_id = data.get('collection_id') if state: label_id = state['category_id'] try: label = Label.query.filter_by(collection_id=collection_id, label_id=label_id).first() except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.DBERR, err_desc='查询失败') if not label: return jsonify(err_no=RET.NODATA, err_desc='没有该标签') image_site = data.get('image') image = Image.query.filter_by( site=str(image_site).split('/', 4)[4]).first() label_url_path = current_app.config['NGINX_SITE'] + fun.getInfo( image.label_path)['group_file_id'] # 读取json文件内容 file = urllib2.urlopen(label_url_path) tmpIm = cStringIO.StringIO(file.read()) label_date = tmpIm.read() label_json_date = json.loads(label_date) old_label_name = label_json_date['classification'][0]['category_name'] old_label = Label.query.filter_by(collection_id=collection_id, name=old_label_name).first() old_label.count -= 1 del label_json_date['classification'][0] label_date = json.dumps(label_json_date) # 保存修改后的json文件 new_label_path = fun.upload(label_date, file_ext_name='json') # 删除原json文件 fun.remove(image.label_path) image.label_path = new_label_path['filename'] db.session.commit() # 检测/分割标注 for label_dict in data.get('annotation'): category_id = label_dict['category_id'] print('label_dict', label_dict) old_bbox = label_dict['old_bbox'] old_bbox = [float(d) for d in old_bbox.split(',')] try: label = Label.query.filter_by(collection_id=collection_id, label_id=category_id).first() except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.DBERR, err_desc='查询失败') if not label: return jsonify(err_no=RET.NODATA, err_desc='没有该标签') image_site = data.get('image') image = Image.query.filter_by( site=str(image_site).split('/', 4)[4]).first() label_url_path = current_app.config['NGINX_SITE'] + fun.getInfo( image.label_path)['group_file_id'] # 读取json文件内容 file = urllib2.urlopen(label_url_path) tmpIm = cStringIO.StringIO(file.read()) label_date = tmpIm.read() label_json_date = json.loads(label_date) for label_json_dict in label_json_date['annotation']: bbox = [float(d) for d in label_json_dict['bbox']] if bbox == old_bbox: print('old_bbox = ', old_bbox) old_label_id = label_dict['category_id'] old_label = Label.query.filter_by( collection_id=collection_id, label_id=old_label_id).first() old_label.count -= 1 label_json_date['annotation'].remove(label_json_dict) label_date = json.dumps(label_json_date) # 保存修改后的json文件 new_label_path = fun.upload(label_date, file_ext_name='json') # 删除原json文件 fun.remove(image.label_path) # 修改数据库json文件存储路径 image.label_path = new_label_path['filename'] db.session.commit() return jsonify(err_no=RET.OK, err_desc='删除成功')
def change_label(): """改变标签""" data = request.get_json() state = data.get('classification') collection_id = data.get('collection_id') if state: label_id = state['category_id'] category_name = state['category_name'] if category_name is not None: try: label = Label.query.filter_by(collection_id=collection_id, label_id=label_id).first() except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.DBERR, err_desc='查询失败') if not label: return jsonify(err_no=RET.NODATA, err_desc='没有该标签') classification = dict() classification['category_id'] = label_id classification['category_name'] = category_name image_site = data.get('image') image = Image.query.filter_by( site=str(image_site).split('/', 4)[4]).first() label_url_path = current_app.config['NGINX_SITE'] + fun.getInfo( image.label_path)['group_file_id'] # 读取json文件内容 file = urllib2.urlopen(label_url_path) tmpIm = cStringIO.StringIO(file.read()) label_date = tmpIm.read() label_json_date = json.loads(label_date) old_label_name = label_json_date['classification'][0][ 'category_name'] old_label = Label.query.filter_by(collection_id=collection_id, name=old_label_name).first() old_label.count -= 1 del label_json_date['classification'][0] label_json_date['classification'].append(classification) label_date = json.dumps(label_json_date) # 保存修改后的json文件 new_label_path = fun.upload(label_date, file_ext_name='json') # 删除原json文件 fun.remove(image.label_path) image.label_path = new_label_path['filename'] label.count += 1 db.session.commit() else: return jsonify(err_no=RET.PARAMERR, err_desc='参数缺失') # 检测/分割标注 for label_dict in data.get('annotation'): category_id = label_dict['category_id'] category_name = label_dict['category_name'] print(label_dict) bbox = label_dict['bbox'] old_bbox = label_dict['old_bbox'] segmentation = label_dict['segmentation'] if category_name and bbox and segmentation and old_bbox is not None: try: label = Label.query.filter_by(collection_id=collection_id, label_id=category_id).first() except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.DBERR, err_desc='查询失败') if not label: return jsonify(err_no=RET.NODATA, err_desc='没有该标签') try: for num in bbox: if not num.isnumeric(): current_app.logger.error(str(num)) return jsonify(err_no=RET.DBERR, err_desc='参数错误') num = float(num) except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.DBERR, err_desc='参数错误') # 创建要保存的标签字典 annotation = dict() annotation['bbox'] = bbox annotation['category_id'] = category_id annotation['category_name'] = category_name annotation['segmentation'] = segmentation image_site = data.get('image') image = Image.query.filter_by( site=str(image_site).split('/', 4)[4]).first() label_url_path = current_app.config['NGINX_SITE'] + fun.getInfo( image.label_path)['group_file_id'] # 读取json文件内容 file = urllib2.urlopen(label_url_path) tmpIm = cStringIO.StringIO(file.read()) label_date = tmpIm.read() label_json_date = json.loads(label_date) for label_json_dict in label_json_date['annotation']: if label_json_dict['bbox'] == old_bbox: old_label_name = label_dict['category_name'] old_label = Label.query.filter_by( collection_id=collection_id, old_label_name=old_label_name).first() old_label.count -= 1 label_json_date['annotation'].remove(label_json_dict) label_json_date['annotation'].append(annotation) label_date = json.dumps(label_json_date) # 保存修改后的json文件 new_label_path = fun.upload(label_date, file_ext_name='json') # 删除原json文件 fun.remove(image.label_path) # 修改数据库json文件存储路径 image.label_path = new_label_path['filename'] label.count += 1 db.session.commit() else: return jsonify(RET.PARAMERR, err_desc='参数缺失') return jsonify(err_no=RET.OK, err_desc='修改成功')