예제 #1
0
def format_im_fn(meta):
    """Formats image filename from VCAT image meta"""
    sha256 = meta['sa_hash']  # sha256
    sha256_tree = file_utils.sha256_tree(sha256)
    if meta['uploaded'] or meta['fn'] != 'index':
        if meta['sa_hash'] is not None:
            fn = '{}_{}{}'.format(meta['sa_hash'], meta['fn'], meta['ext'])
        else:
            fn = '{}{}'.format(meta['fn'], meta['ext'])
    else:
        fn = '{}_{}{}'.format(meta['sa_hash'], meta['frame'], meta['ext'])
    return join(fn)
예제 #2
0
    def load_images(self,
                    dir_media,
                    opt_size,
                    opt_density,
                    opt_drawframes=False):

        sha256_tree = file_utils.sha256_tree(self._sha256)
        dir_sha256 = join(dir_media, sha256_tree, self._sha256)

        opt_size_label = cfg.IMAGE_SIZE_LABELS[opt_size]

        # get the keyframe status data to check if images available
        try:
            keyframe_status = self.get_metadata(types.Metadata.KEYFRAME_STATUS)
        except Exception as ex:
            self.log.error(
                'no keyframe metadata. Try: "append -t keyframe_status"')
            return

        keyframes = {}

        # if keyframe images were generated and exist locally
        if keyframe_status and keyframe_status.get_status(opt_size):

            keyframe_metadata = self.get_metadata(types.Metadata.KEYFRAME)

            if not keyframe_metadata:
                self.log.error(
                    'no keyframe metadata. Try: "append -t keyframe"')
                return

            # get keyframe indices
            frame_idxs = keyframe_metadata.get_keyframes(opt_density)

            for frame_idx in frame_idxs:
                # get keyframe filepath
                fp_keyframe = join(dir_sha256, file_utils.zpad(frame_idx),
                                   opt_size_label, 'index.jpg')
                try:
                    im = cv.imread(fp_keyframe)
                    im.shape  # used to invoke error if file didn't load correctly
                except:
                    self.log.warn('file not found: {}'.format(fp_keyframe))
                    # don't add to keyframe dict
                    continue

                keyframes[frame_idx] = im

        # append metadata to chair_item's mapping item
        self.set_keyframes(keyframes, opt_drawframes)
예제 #3
0
def format_im_url(url_base, meta, size='lg'):
    """Formats image S3 URLfrom VCAT image meta"""
    sha256 = meta['sa_hash']  # sha256
    sha256_tree = file_utils.sha256_tree(sha256)
    if meta['uploaded'] or meta['fn'] != 'index':
        fn = join(str(meta['id']), meta['fn'],
                  '{}{}'.format(size, meta['ext']))
        url_path = join('media/images', fn)
    else:
        fn = '{}{}'.format(meta['fn'], meta['ext'])
        if meta['verified']:
            url_path = join('v1/media/keyframes', sha256_tree, sha256,
                            meta['frame'], 'lg', fn)
        else:
            url_path = join('v1/media/keyframes/unverified', sha256_tree,
                            sha256, meta['frame'], 'lg', fn)
    return join(url_base, url_path)
예제 #4
0
def cli(ctx, opt_fp_neg, opt_dir_project, opt_disk, opt_size):
  """Generates negative images"""

  # ------------------------------------------------
  # imports
  import os
  from os.path import join
  from glob import glob
  from pathlib import Path

  from vframe.utils import logger_utils, im_utils, file_utils
  from vframe.settings.paths import Paths

  log = logger_utils.Logger.getLogger()
  log.debug('negative mining')

  dir_media_unver = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=types.Verified.UNVERIFIED)
  dir_media_ver = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=types.Verified.VERIFIED)
  opt_size_label = cfg.IMAGE_SIZE_LABELS[opt_size]

  fp_train_neg = join(opt_dir_project, vcat_cfg.FP_TRAIN_NEGATIVES)
  dir_labels_negative = join(opt_dir_project, vcat_cfg.DIR_LABELS_NEGATIVE)
  dir_negative = join(opt_dir_project, vcat_cfg .DIR_IMAGES_NEGATIVE)

  file_utils.mkdirs(dir_negative)
  file_utils.mkdirs(dir_labels_negative)
  
  negative_list = pd.read_csv(opt_fp_neg)
  negative_list['description'] = negative_list['description'].fillna('')  # ensure not empty
  # negative_list['desc'] = negative_list['desc'].astype('str') 
  neg_training_files = []

  # for sha256 in sha256_list[:35]:
  for i, row in negative_list.iterrows():
    sha256 = row['sha256']
    sha256_tree = file_utils.sha256_tree(sha256)
    ver_list = glob(join(dir_media_ver, sha256_tree, sha256, "*"))
    unver_list = glob(join(dir_media_unver, sha256_tree, sha256, "*"))
    dir_frames = ver_list + unver_list

    log.debug('adding {} frames about "{}"'.format(len(dir_frames), row['description']))

    for dir_frame in dir_frames:
      frame_idx = Path(dir_frame).stem
      fp_keyframe_src = join(dir_frame, opt_size_label, 'index.jpg')
      fpp_keyframe_src = Path(fp_keyframe_src)
      if fpp_keyframe_src.exists():
        # create symlinked image
        fpp_keyframe_dst = Path(join(dir_negative, '{}_{}.jpg'.format(sha256, frame_idx)))
        if fpp_keyframe_dst.exists() and fpp_keyframe_dst.is_symlink():
          fpp_keyframe_dst.unlink()
        fpp_keyframe_dst.symlink_to(fpp_keyframe_src)
        # create empty label
        fp_label_txt = join(dir_labels_negative, '{}_{}.txt'.format(sha256, frame_idx))
        with open(fp_label_txt, 'w') as fp:
          fp.write('')
        # and, add this file to the training list
        neg_training_files.append(str(fpp_keyframe_dst))


  # for each keyframe if it exists
  log.info('writing {} lines to: {}'.format(len(neg_training_files), fp_train_neg))
  file_utils.write_text(neg_training_files, fp_train_neg)
  
  # add prompt
  log.info('mv labels_negative/*.txt labels/')
  log.info('mv images_negative/*.jpg images/')
예제 #5
0
def extract(items, dir_out, dir_videos, keyframe_type, threads=1):
  """Extracts keyframes from images"""
  
  task_queue = Queue()
  print_lock = threading.Lock()
  log = logging.getLogger()

  if threads > 1:
    
    def thread_processor(task_obj):
      tl = threading.local()
      tl.fp_video = task_obj['fp_video']
      tl.idxs = task_obj['idxs']
      tl.dir_out = task_obj['dir_out']
      tl.sha256_tree = task_obj['sha256_tree']
      tl.sha256 = task_obj['sha256']
      try:
        tl.frame_ims = im_utils.vid2frames(tl.fp_video, idxs=tl.idxs)
      except Exception as ex:
        logging.getLogger().error('Could not read video file')
        logging.getLogger().error('file: {}'.format(tl.fp_video))
        logging.getLogger().error('sha256: {}'.format(tl.sha256))
        return
        
      tl.labels = cfg.IMAGE_SIZE_LABELS
      tl.sizes = cfg.IMAGE_SIZES

      for tl.k_label, tl.k_width in zip(reversed(tl.labels), reversed(tl.sizes)):
        tl.label = tl.labels[tl.k_label]
        tl.width = tl.sizes[tl.k_width]
        # pyramid down frame sizes 1280, 640, 320, 160
        try:
          tl.frame_ims = [im_utils.resize(tl.im, width=tl.width) for tl.im in tl.frame_ims]
        except:
          logging.getLogger().error('')
          logging.getLogger().error('Could not resize. Bad video or missing file')
          logging.getLogger().error(tl.sha256)
          logging.getLogger().error('')
          return


        for tl.idx, tl.im in zip(tl.idxs, tl.frame_ims):
          # ensure path exists
          tl.zpad = file_utils.zpad(tl.idx)
          tl.fp_dst = join(tl.dir_out, tl.sha256_tree, tl.sha256, tl.zpad, tl.label, 'index.jpg')
          # convert to PIL
          tl.im_pil = im_utils.ensure_pil(tl.im, bgr2rgb=True)
          file_utils.ensure_path_exists(tl.fp_dst)
          tl.im_pil.save(tl.fp_dst, quality=cfg.JPG_SAVE_QUALITY)


    def process_queue(num_items):
      # TODO: progress bar
      while True:
        task_obj = task_queue.get()
        thread_processor(task_obj)
        logging.getLogger().info('process: {:.2f}% {:,}/{:,}'.format( 
          (task_queue.qsize() / num_items)*100, num_items-task_queue.qsize(), num_items))
        task_queue.task_done()

    # avoid race conditions by creating dir structure here
    log.info('create directory structure first to avoid race conditions')
    log.info('TODO: this needs to be fixed, thread lock maybe')
    for sha256, item in tqdm(items.items()):
      item_metadata = item.metadata.get(MetadataType.KEYFRAME, {})
      sha256_tree = file_utils.sha256_tree(sha256)
      fp_dst = join(dir_out, sha256_tree)
      file_utils.ensure_path_exists(fp_dst)

    # init threads
    num_items = len(items)
    for i in range(threads):
      t = threading.Thread(target=process_queue, args=(num_items,))
      t.daemon = True
      t.start()

    # process threads
    start = time.time()
    for sha256, item in items.items():
      sha256_tree = file_utils.sha256_tree(sha256)
      item_metadata = item.metadata.get(MetadataType.KEYFRAME, {})
      if not item_metadata:
        continue
      keyframe_data = item_metadata.metadata
      idxs = keyframe_data.get(keyframe_type)
      fp_video = join(dir_videos, sha256_tree, '{}.{}'.format(sha256, item.ext))
      task_obj = {
        'fp_video': fp_video,
        'idxs': idxs,
        'dir_out':dir_out,
        'sha256': sha256,
        'sha256_tree': sha256_tree
        }
      task_queue.put(task_obj)

    task_queue.join()

  else:
    
    for sha256, item in tqdm(items.items()):
      item_metadata = item.metadata.get(MetadataType.KEYFRAME, {})
      if not item_metadata:
        continue

      sha256_tree = file_utils.sha256_tree(sha256)
      keyframe_data = item_metadata.metadata
      
      #idxs_basic = keyframe_data.get(KeyframeMetadataType.BASIC)
      #idxs_dense = keyframe_data.get(KeyframeMetadataType.DENSE)
      #idxs_expanded = keyframe_data.get(KeyframeMetadataType.EXPANDED)

      # fetches the metadata by the enum type from the custom click param
      idxs = keyframe_data.get(keyframe_type)

      # get frames from video
      fp_video = join(dir_videos, sha256_tree, '{}.{}'.format(sha256, item.ext))
      frame_ims = im_utils.vid2frames(fp_video, idxs=idxs)
      labels = cfg.IMAGE_SIZE_LABELS
      sizes = cfg.IMAGE_SIZES
      for k_label, k_width in zip(reversed(labels), reversed(sizes)):
        label = labels[k_label]
        width = sizes[k_width]
        # pyramid down frame sizes 1280, 640, 320, 160
        frame_ims = [im_utils.resize(im, width=width) for im in frame_ims]

        for idx, im in zip(idxs, frame_ims):
          # ensure path exists
          zpad = file_utils.zpad(idx)
          fp_dst = join(dir_out, sha256_tree, sha256, zpad, label, 'index.jpg')
          # conver to PIL
          im_pil = im_utils.ensure_pil(im, bgr2rgb=True)
          file_utils.ensure_path_exists(fp_dst)
          im_pil.save(fp_dst, quality=cfg.JPG_SAVE_QUALITY)
예제 #6
0
def cli(ctx, sink, opt_disk, opt_density):
    """Generates KeyframeStatus metadata"""
    # Recommended: Use Expanded density to check for all keyframes

    # -------------------------------------------------
    # imports

    import os
    from os.path import join
    from pathlib import Path

    from vframe.settings.paths import Paths
    from vframe.settings import vframe_cfg as cfg
    from vframe.utils import file_utils, logger_utils

    from vframe.models.metadata_item import KeyframeStatusMetadataItem

    # -------------------------------------------------
    # process

    log = logger_utils.Logger.getLogger()

    # set paths
    media_type = types.Metadata.KEYFRAME
    metadata_type = types.Metadata.KEYFRAME_STATUS
    dir_keyframes = Paths.media_dir(media_type,
                                    data_store=opt_disk,
                                    verified=ctx.opts['verified'])

    # iterate sink
    while True:
        chair_item = yield
        sha256 = chair_item.sha256
        sha256_tree = file_utils.sha256_tree(sha256)
        dir_parent = join(dir_keyframes, sha256_tree, sha256)

        # check if keyframe metadata exists
        keyframe_metadata_item = chair_item.item.get_metadata(
            types.Metadata.KEYFRAME)
        if not keyframe_metadata_item:
            log.error(
                'no keyframe metadata. try "append -t keyframe", {}'.format(
                    keyframe_metadata_item))
            chair_item.item.set_metadata(metadata_type, {})
        else:
            # check if the keyframes images exist
            status = {k: False for k in cfg.IMAGE_SIZE_LABELS}
            if Path(dir_parent).exists():

                # get keyframe numbers
                idxs = keyframe_metadata_item.get_keyframes(opt_density)

                for idx in idxs:
                    labels = [v for k, v in cfg.IMAGE_SIZE_LABELS.items()]
                    for k, label in cfg.IMAGE_SIZE_LABELS.items():
                        fpp_im = Path(dir_parent, file_utils.zpad(idx), label,
                                      'index.jpg')
                        if fpp_im.exists():
                            status[k] = True

                # append metadata to chair_item's mapping item
                chair_item.item.set_metadata(
                    metadata_type, KeyframeStatusMetadataItem(status))

        # -------------------------------------------------
        # continue processing other items

        sink.send(chair_item)
예제 #7
0
def cli(ctx, sink, opt_disk, opt_format, opt_metadata_tree_type):
  """Collate depated metadata tree files"""

  # -------------------------------------------------
  # imports
  
  import click
  from pathlib import Path
  from tqdm import tqdm

  from vframe.settings import vframe_cfg as cfg
  from vframe.settings.paths import Paths
  from vframe.utils import file_utils, logger_utils
  from vframe.models.metadata_item import MediainfoMetadataItem, KeyframeMetadataItem

  from cli_vframe import processor

  
  # -------------------------------------------------
  # process

  log = logger_utils.Logger.getLogger()

  if opt_metadata_tree_type == types.MetadataTree.MEDIAINFO_TREE:
    metdata_type = types.Metadata.MEDIAINFO
  if opt_metadata_tree_type == types.MetadataTree.KEYFRAME_TREE:
    metdata_type = types.Metadata.KEYFRAME
  
  dir_metadata = Paths.metadata_tree_dir(opt_metadata_tree_type, data_store=opt_disk)

  # accumulate chair items
  chair_items = []
  while True:
    try:
      chair_items.append( (yield) )
    except GeneratorExit as ex:
      break

  skipped = []
  num_skipped = 0
  found = []
  num_found = 0

  # iterate chair items and gather metadata index.json files
  num_items = len(chair_items)
  for chair_item in tqdm(chair_items):
    item = chair_item.item
    sha256 = item.sha256
    sha256_tree = file_utils.sha256_tree(sha256)
    fpp_metadata = Path(dir_metadata, sha256_tree, sha256, 'index.json')
    
    # skip if not existing 
    metadata = {}
    if fpp_metadata.exists():
      try:
        metadata = file_utils.lazyload(fpp_metadata)
      except Exception as ex:
        log.error('could not read json: {}, ex: {}'.format(str(fpp_metadata), ex))
        continue
    
    # count items skipped
    if not metadata:
      skipped.append(fpp_metadata)
      num_skipped = len(skipped)
      per = num_skipped / (num_found + num_skipped) * 100
      # log.debug('{:.2f}% ({:,}/{:,}) not found: {}'.format(per, num_skipped, (num_found + num_skipped), str(fpp_metadata)))
      log.debug('{:.2f}% ({:,}/{:,}) missing'.format(per, num_skipped, (num_found + num_skipped)))
      chair_item.item.set_metadata(metdata_type, metadata_obj)
    else:
      found.append(fpp_metadata)
      num_found = len(found)
      # construct and append metadata
      if metdata_type == types.Metadata.MEDIAINFO:
        metadata_obj = MediainfoMetadataItem.from_index_json(metadata)
        chair_item.item.set_metadata(metdata_type, metadata_obj)
      elif metdata_type == types.Metadata.KEYFRAME:
        metadata_obj = KeyframeMetadataItem.from_index_json(metadata)
        chair_item.item.set_metadata(metdata_type, metadata_obj)
      else:
        raise ValueError('{} is not a valid metadata type'.format(metdata_type))

  log.info('skipped: {:,} items'.format(len(skipped)))


  # -------------------------------------------------
  # rebuild

  for chair_item in chair_items:
      sink.send(chair_item)