def tagging_function(file_name, label_file, images, size, collection): for l_name, l_file in label_file.items(): if file_name.split('.')[0] + '.' + l_name.split('.')[1] == l_name: if l_name.split('.')[1] == 'xml': l_file = base64.b64decode(l_file) clf.segmentation(images, size, l_file, collection) elif l_name.split('.')[1] == 'json': l_file = base64.b64decode(l_file) cjf.segmentation(images, size, l_file) else: clf.default(images, size) else: continue
def spider( spider_dict ): # spider_dict 是后端从前端post表单获取到用户给的参数形式为{'keyword':xx, 'spider_page_num':xx, 'start_page':xx, 'number':xx} """爬虫任务""" from DataSet.rabbitMQ.task_queue_client import spider_task with app.app_context(): site_list = list() print spider_dict spider_image_list = spider_task(spider_dict) spider_image_list = json.loads(spider_image_list) print spider_image_list for spider_image in spider_image_list: file = urllib2.urlopen(spider_image) tmpIm = cStringIO.StringIO(file.read()) # c = tmpIm.read()s site = fun.upload(tmpIm.read(), file_ext_name='jpg') site_list.append(site) # 数据库存储 for spider_file in site_list: image = Image() image.name = spider_file.get('filename') image.site = spider_file.get('file_id') spider_dict1 = json.loads(spider_dict) print(spider_dict1['collection_id']) image.collection_id = spider_dict1['collection_id'] print image.collection_id db.session.add(image) db.session.commit() images = Image.query.filter_by(name=image.name).first() image_url_path = current_app.config[ 'NGINX_SITE'] + fun.getInfo(images.site)['group_file_id'] file = urllib2.urlopen(image_url_path) size = image_size(file.read()) clf.default(images, size) collection_id = spider_dict1['collection_id'] same_images_clean(collection_id) return site_list
def upload_image(collection_name): """ 上传图片 分类/人脸 """ local_time = time.time() try: collection = Collection.query.filter_by( user_id=g.user.id, name=collection_name).first() # 当前集合 if collection is None: return jsonify(err_no=RET.PARAMERR, err_desc='没有该数据集') except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.PARAMERR, err_desc='参数有误') status = request.form['status'] if not status: return jsonify(err_on=RET.PARAMERR, err_desc='参数不完整') upload_image_site = request.files.getlist('upload_image_site') # 图片文件 table_site = request.files.get('table_site') # 对应表文件 label_id = [l_id.label_id for l_id in collection.labels] label_name = [l_name.name.encode('utf-8') for l_name in collection.labels] if status == 'add_label': # 新增 new_label = request.form['new_label'] if new_label is None: return jsonify(err_no=RET.NODATA, err_desc='名称参数缺失') if new_label.split(':')[0] in label_id or new_label.split( ':')[1] in label_name: return jsonify(err_no=RET.NODATA, err_desc='该标签或id以存在') try: label = Label() label.name = new_label.split(':')[1] label.label_id = new_label.split(':')[0] label.collection_id = collection.id db.session.add(label) db.session.commit() except Exception as e: current_app.logger.error(e) db.session.rollback() return jsonify(err_no='ok') if not upload_image_site: return jsonify(errno=RET.PARAMERR, errmsg='参数缺失') if status == 'appoint_table': # 对应表 if table_site is None: return jsonify(errno=RET.PARAMERR, errmsg='参数缺失') try: table_name = table_site.read() table_list = [] for i in table_name.split('\r\n'): table_list.append(i.split(':')[0]) for up_file in upload_image_site: tables = table_name.split(up_file.filename + ':')[1] images = storage(up_file, collection) size = image_size(up_file) labels = Label.query.filter_by( name=tables.split('\r\n')[0]).first() if up_file.filename in table_list and \ labels is not None: # 标注上传 clf.classification(images, size, labels.label_id, labels.name) else: clf.default(images, size) except Exception as e: current_app.logger.error(e) db.session.rollback() return jsonify(err_no=RET.DBERR, err_desc='图片保存失败') return jsonify(err_no=RET.OK, err_desc='OK') if status == 'default': # 未标注 if not upload_image_site: return jsonify(errno=RET.PARAMERR, errmsg='参数缺失') try: for up_file in upload_image_site: images = storage(up_file, collection) size = image_size(up_file) clf.default(images, size) except Exception as e: current_app.logger.error(e) db.session.rollback() return jsonify(err_no=RET.DBERR, err_desc='图片保存失败') return jsonify(err_no=RET.OK, err_desc='OK') if status.split(':')[1] in label_name: # 标注上传 try: label_data = Label.query.filter_by( name=status.split(':')[1]).first() if label_data is None: return jsonify(err_no=RET.PARAMERR, err_desc='没有此标签') if not upload_image_site: return jsonify(errno=RET.PARAMERR, errmsg='参数缺失') if int(status.split(':')[0]) != label_data.label_id: return jsonify(errno=RET.PARAMERR, errmsg='错误的标签ID') for up_file in upload_image_site: images = storage(up_file, collection) size = image_size(up_file) clf.classification(images, size, status.split(':')[0], status.split(':')[1]) except Exception as e: # fun.remove(image_status.get('file_id')) current_app.logger.error(e) db.session.rollback() return jsonify(err_no=RET.DBERR, err_desc='图片保存失败') else: return jsonify(err_no=RET.DBERR, err_desc='无效参数') request_id = (request.cookies.get('session'))[:36] time_used = int((time.time() * 1000) - int(local_time * 1000)) return jsonify(err_no=RET.OK, err_desc='OK', dataset_id=collection.id, request_id=request_id, time_used=time_used)
def upload_images(collection_name): """ 上传图片 检测/分割 """ local_time = time.time() try: collection = Collection.query.filter_by( user_id=g.user.id, name=collection_name).first() # 当前集合 if collection is None: return jsonify(err_no=RET.PARAMERR, err_desc='没有该数据集') except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.PARAMERR, err_desc='参数有误') status = request.form.get('status') upload_image_site = request.files.getlist('upload_image_site') # 图片文件 label_file = request.files.getlist('label_file') # 标注文件 table_site = request.files.get('table_site') # 对应表文件 if status == 'default': # 未标注 try: if not upload_image_site: return jsonify(errno=RET.PARAMERR, errmsg='参数缺失') for up_file in upload_image_site: images = storage(up_file, collection) size = image_size(up_file) clf.default(images, size) except Exception as e: current_app.logger.error(e) db.session.rollback() return jsonify(err_no=RET.DBERR, err_desc='图片保存失败') return jsonify(err_no=RET.OK, err_desc='OK') if not upload_image_site and not label_file: return jsonify(err_no=RET.PARAMERR, err_desc='缺少图片文件或标记文件') label_file_name = [] # 标注列表名,有后缀 label_list = [] # 标注列表名,无后缀 for l_file in label_file: label_file_name.append(l_file.filename) label_list.append(l_file.filename.split('.')[0]) if status is None: # 已经标注上传,未选择对应表 for up_file in upload_image_site: try: images = storage(up_file, collection) size = image_size(up_file) if up_file.filename.split('.')[0] in label_list: for l_file in label_file: if up_file.filename.split( '.')[0] + '.' + l_file.filename.split( '.')[1] == l_file.filename: if l_file.filename.split('.')[1] == 'xml': clf.segmentation(images, size, l_file, collection) elif l_file.filename.split('.')[1] == 'json': cjf.segmentation(images, size, l_file) else: clf.default(images, size) else: continue else: # 未标注 clf.default(images, size) except Exception as e: current_app.logger.error(e) db.session.rollback() return jsonify(err_no=RET.DBERR, err_desc='图片保存失败') return jsonify(err_no=RET.OK, err_desc='OK') if not status: return jsonify(err_on=RET.PARAMERR, err_desc='参数不完整') if status == 'appoint_table': # 对应表 if not table_site: return jsonify(errno=RET.PARAMERR, errmsg='参数缺失') try: table_str = table_site.read() table_name_list = [] # 对应表,图片名 table_file_list = [] # 对应表,标注文件名 for i in table_str.split('\r\n'): table_name_list.append(i.split(':')[0]) table_file_list.append(i.split(':')[1]) for up_file in upload_image_site: images = storage(up_file, collection) size = image_size(up_file) if up_file.filename in table_name_list and \ table_str.split(up_file.filename + ':')[1].split('\r\n')[0] in label_file_name: for l_file in label_file: if up_file.filename.split( '.')[0] + '.' + l_file.filename.split( '.')[1] == l_file.filename: if l_file.filename.split('.')[1] == 'xml': clf.segmentation(images, size, l_file, collection) elif l_file.filename.split('.')[1] == 'json': cjf.segmentation(images, size, l_file) else: clf.default(images, size) else: continue else: clf.default(images, size) except Exception as e: current_app.logger.error(e) db.session.rollback() return jsonify(err_no=RET.DBERR, err_desc='图片保存失败') request_id = (request.cookies.get('session'))[:36] time_used = int((time.time() * 1000) - int(local_time * 1000)) return jsonify(err_no=RET.OK, err_desc='OK', dataset_id=collection.id, request_id=request_id, time_used=time_used)
def upload_image(): """ 爬虫配置 """ # 配置参数字典 from DataSet.celery_tasks.tasks import spider, same_images_clean data = request.get_json() print(data) if not data: return jsonify(err_no=RET.PARAMERR, err_desc= '参数缺失') collection_id = data['collection_id'] spider_dict = dict() spider_save_list = list() try: collection = Collection.query.filter_by(id=collection_id).first if collection is None: return jsonify(err_no=RET.PARAMERR, err_desc='没有该数据集') except Exception as e: current_app.logger.error(e) return jsonify(err_no=RET.PARAMERR, err_desc='参数有误') data = request.get_json() number = data.get('number') number = int(number) spider_page_num = number/60 + 1 if not number: return jsonify(err_no=RET.PARAMERR, err_desc='参数缺失') keywords = data.get('keywords') # 关键字搜图 if keywords: keywords = keywords.split(';') for keyword in keywords: spider_dict['keyword'] = keyword spider_dict['spider_page_num'] = spider_page_num spider_dict['start_page'] = 1 spider_dict['number'] = number spider_dict['collection_id'] = collection_id spider_save_list = spider.delay(json.dumps(spider_dict)) spider_save_list = spider_save_list.get() print(spider_save_list) # 数据库存储 for spider_file in spider_save_list: image = Image() image.name = spider_file.get('filename') image.site = spider_file.get('file_id') image.collection_id = collection_id db.session.add(image) db.session.commit() images = Image.query.filter_by(name=image.name).first() image_url_path = current_app.config['NGINX_SITE'] + fun.getInfo(images.site)['group_file_id'] file = urllib2.urlopen(image_url_path) size = image_size(file.read()) clf.default(images, size) collection_id = int(collection_id) same_images_clean.delay(collection_id) # 以图搜图 else: image = data.get('image') print(image) if not image: return jsonify(err_no=RET.PARAMERR, err_desc='参数缺失') return jsonify(err_no=RET.OK, err_desc='爬取结束')
def classification(status, table_site, upload_image_site, collection, label_name): """ 分类/人脸 :param status: 状态,----lmz改-》标注 :param table_site: 对应表文件 :param upload_image_site: 图片文件 :param collection: 集合id :param label_name: 标签集合 :return: """ with app.app_context(): collections = Collection.query.filter_by(id=collection).first() table_list = [] error_list = [] for file_name, up_file in upload_image_site.items(): up_file = base64.b64decode(up_file) images = storage(up_file, collections, file_name) size = image_size(up_file) if status == 'appoint_table': # 对应表 if table_site is None: return '{"err_no": "1", "err_desc": "参数缺失"}' try: if not table_list: for i in table_site.split('\r\n'): table_list.append(i.split(':')[0]) tables = table_site.split(file_name + ':')[1] labels = Label.query.filter_by( name=tables.split('\r\n')[0]).first() if file_name in table_list and labels is not None: # 标注上传 clf.classification(images, size, labels.label_id, labels.name) else: clf.default(images, size) except Exception as e: fun.remove(images.site) current_app.logger.error(e) error_list.append(e) elif status == 'default': # 未标注 if not upload_image_site: return '{"err_no": "1", "err_desc": "参数缺失"}' try: clf.default(images, size) except Exception as e: current_app.logger.error(e) fun.remove(images.site) error_list.append(e) elif status.split(':')[1] in label_name: # 标注上传 try: label_data = Label.query.filter_by( collection_id=collection, label_id=status.split(':')[0]).first() if label_data is None: return '{"err_no": "1", "err_desc": "没有此标签"}' if not upload_image_site: return '{"err_no": "1", "err_desc": "参数缺失"}' if status.split(':')[1] != label_data.name: return '{"err_no": "1", "err_desc": "错误的标签ID"}' clf.classification(images, size, status.split(':')[0], status.split(':')[1]) except Exception as e: fun.remove(images.site) current_app.logger.error(e) error_list.append(e) db.session.delete(images) db.session.commit() else: fun.remove(images.site) db.session.delete(images) db.session.commit() same_images_clean(collection) return '{"err_no": "0", "err_desc": "OK", "上传张数": "%s", "失败张数": "%s", "错误名称": "%s"}' % \ (len(upload_image_site), len(error_list), error_list)
def detection(table_site, upload_image_site, label_file, collection): """ 检测/分割 :param table_site: 对应表 :param upload_image_site: 图片 :param label_file: 标注文件 :param collection: 集合id :return: """ with app.app_context(): collection_id = collection collection = Collection.query.filter_by(id=collection).first() label_file_name = [] # 标注列表名,有后缀 label_list = [] # 标注列表名,无后缀 if upload_image_site and label_file: for l_file in label_file: label_file_name.append(l_file) label_list.append(l_file.split('.')[0]) table_name_list = [] # 对应表,图片名 table_file_list = [] # 对应表,标注文件名 error_list = [] method = 'default' for file_name, up_file in upload_image_site.items(): up_file = base64.b64decode(up_file) images = storage(up_file, collection, file_name) size = image_size(up_file) if all([table_site, upload_image_site, label_file, collection]): method = 'ap_table' # 对应表 try: if not all([table_name_list, table_file_list]): for i in table_site.split('\r\n'): table_name_list.append(i.split(':')[0]) table_file_list.append(i.split(':')[1]) if file_name in table_name_list and \ table_site.split(file_name + ':')[1].split('\r\n')[0] in label_file_name: tagging_function(file_name, label_file, images, size, collection) else: clf.default(images, size) except Exception as e: fun.remove(images.site) current_app.logger.error(e) error_list.append(e) elif all([upload_image_site, label_file, collection]): method = 'label' # 已经标注上传,未选择对应表 try: if file_name.split('.')[0] in label_list: tagging_function(file_name, label_file, images, size, collection) else: clf.default(images, size) except Exception as e: fun.remove(images.site) current_app.logger.error(e) error_list.append(e) elif all([upload_image_site, collection]): method = 'no_label' # 未标注 try: clf.default(images, size) except Exception as e: fun.remove(images.site) current_app.logger.error(e) error_list.append(e) else: return '{"err_no": "1", "err_desc": "参数缺失"}' same_images_clean(collection_id) return '{"err_no": "0", "err_desc": "OK", "上传张数": "%s", "失败张数": "%s", "错误名称": "%s", "method":"%s"}' % \ (len(upload_image_site), len(error_list), error_list, method)