def assign_tags(video_id): import django from PIL import Image sys.path.append(os.path.dirname(__file__)) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings") django.setup() from django.conf import settings from dvaapp.models import Video, Frame, Region from dvalib import entity, annotator dv = Video.objects.get(id=video_id) frames = Frame.objects.all().filter(video=dv) v = entity.WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT) wframes = { df.pk: entity.WFrame(video=v, frame_index=df.frame_index, primary_key=df.pk) for df in frames } algorithm = annotator.OpenImagesAnnotator() logging.info("starting annotation {}".format(algorithm.name)) for k, f in wframes.items(): tags = algorithm.apply(f.local_path()) a = Region() a.region_type = Region.ANNOTATION a.frame_id = k a.video_id = video_id a.object_name = "OpenImagesTag" a.metadata_text = " ".join([t for t, v in tags.iteritems() if v > 0.1]) a.metadata_json = json.dumps( {t: 100.0 * v for t, v in tags.iteritems() if v > 0.1}) a.full_frame = True a.save() print a.metadata_text
def recognize_text(video_pk): """ Recognize text in regions with name CTPN_TEXTBOX using CRNN :param detector_pk :param video_pk :return: """ setup_django() from dvaapp.models import Region from django.conf import settings from PIL import Image import sys video_pk = int(video_pk) import dvalib.crnn.utils as utils import dvalib.crnn.dataset as dataset import torch from torch.autograd import Variable from PIL import Image import dvalib.crnn.models.crnn as crnn model_path = '/root/DVA/dvalib/crnn/data/crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256, 1) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) for r in Region.objects.all().filter(video_id=video_pk, object_name='CTPN_TEXTBOX'): img_path = "{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk) image = Image.open(img_path).convert('L') image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) dr = Region() dr.video_id = r.video_id dr.object_name = "CRNN_TEXT" dr.x = r.x dr.y = r.y dr.w = r.w dr.h = r.h dr.region_type = Region.ANNOTATION dr.metadata_text = sim_pred dr.frame_id = r.frame_id dr.save()