Exemplo n.º 1
0
    def __format_frame_for_update(self, frame_number: int,
                                  parent_input: Input):
        """
        frame may or may not exist yet

        """
        input = Input.new(
            project=
            None,  # required, but see project_id  detached session below.
            media_type="frame")

        frame_number = int(frame_number)  # cast to avoid future problems

        input.project_id = self.project.id  # Avoids detached session issues for parallel processing
        input.mode = "update"
        if parent_input.mode == 'update_with_existing':
            input.mode = 'update_with_existing'
        input.parent_input_id = parent_input.id
        input.parent_file_id = parent_input.file.id  # Assume downstream process will use this to get frame
        input.frame_number = frame_number
        input.video_parent_length = self.highest_frame_encountered

        # Returns input because it does formatting too, TODO adjust function name
        input = self.get_instance_list_from_packet_map(
            input=input, frame_number=frame_number)

        return input
Exemplo n.º 2
0
 def generate_sample_files_for_dataset(self, dataset):
     NUM_IMAGES = 3
     NUM_VIDEOS = 3
     files_list_count = WorkingDirFileLink.file_list(
         self.session,
         working_dir_id=dataset.id,
         root_files_only=True,  # TODO do we need to get child files too?
         limit=None,
         counts_only=True,
         type=['image', 'video'])
     if files_list_count >= NUM_IMAGES:
         return
     for i in range(0, NUM_IMAGES):
         diffgram_input = Input(project_id=dataset.project_id,
                                url='https://picsum.photos/1000',
                                media_type='image',
                                directory_id=dataset.id,
                                type='from_url')
         self.session.add(diffgram_input)
         self.session.flush()
         process_media = Process_Media(session=self.session,
                                       input_id=diffgram_input.id,
                                       input=diffgram_input,
                                       item=None)
         process_media.main_entry()
     # Commit right away for future querying.
     commit_with_rollback(self.session)
Exemplo n.º 3
0
    def new_input(self):
        """
        Careful for video_split_duration...shouldn't add it
        since we condition on it

        NOTE here we have self.parent_input available
        as stored on Video Preprocess class but parent_input is not available
        on a generic input object see notes in Input class
        """

        self.input = Input.new(parent_input_id=self.parent_input.id,
                               project=self.project,
                               media_type="video",
                               type="from_video_split",
                               job_id=self.parent_input.job_id,
                               directory_id=self.parent_input.directory_id)

        self.session.add(self.input)
        self.session.flush()

        # Do we need .mp4 on end here?
        self.input.raw_data_blob_path = settings.PROJECT_VIDEOS_BASE_DIR + \
                                        str(self.project.id) + "/raw/" + str(self.input.id)

        self.extension = ".mp4"
Exemplo n.º 4
0
    def create_input(
            self,
            project,
            request,
            filename
    ):

        self.input = Input.new(
            project=project,
            media_type=None,
            job_id=request.form.get('job_id'),
            directory_id=request.form.get('directory_id'),  # Not trusted
            video_split_duration=request.form.get('video_split_duration')
        )

        self.session.add(self.input)

        self.input = Upload.upload_limits(
            input=self.input,
            file_size=self.dztotalfilesize)

        self.input.original_filename = secure_filename(
            filename)  # http://flask.pocoo.org/docs/0.12/patterns/fileuploads/
        self.input.extension = os.path.splitext(self.input.original_filename)[1].lower()
        self.input.original_filename = os.path.split(self.input.original_filename)[1]

        # At somepoint should really declare
        # From UI here...
        self.input.type = "from_resumable"
        self.input.dzuuid = self.dzuuid
        self.input.action_flow_id = request.headers.get('flow_id')

        if self.input.action_flow_id:

            if self.input.flow is None:
                self.input.status = "failed"
                self.input.status_text = "No flow found"
                return

        self.input.mode = request.headers.get('mode')

        self.input.media_type = Process_Media.determine_media_type(
            extension=self.input.extension)

        if not self.input.media_type:
            self.input.status = "failed"
            self.input.status_text = "Invalid file type: " + self.input.extension

        # self.input.user =

        self.session.flush()  # For ID for path

        self.input.raw_data_blob_path = settings.PROJECT_RAW_IMPORT_BASE_DIR + \
                                        str(self.input.project.id) + "/raw/" + str(self.input.id)

        data_tools.create_resumable_upload_session(
            blob_path=self.input.raw_data_blob_path,
            content_type=None,
            input = self.input
        )
Exemplo n.º 5
0
    def push_frames_for_copy_to_queue(self, source_video_parent_file_id,
                                      destination_video_parent_file_id):
        """
            Give the current data at self.input, get the video frame of the existing file
            and push them to the ProcessMedia Queue.
        :return:
        """

        source_video_frames = WorkingDirFileLink.image_file_list_from_video(
            session=self.session,
            video_parent_file_id=source_video_parent_file_id,
            order_by_frame=True)
        frame_completion_controller = FrameCompletionControl()
        for frame in source_video_frames:
            frame_completion_controller.add_pending_frame(frame.frame_number)

        for frame in source_video_frames:
            ### HOW TOD AVOID DETACHED SESSION
            ### Must only pass IDs and not pass any other objects

            # Actually the add remove link thing could be different too...

            # Careful the file id is the newly copied video
            # The previous video id should come from the NEW file id not the previous one
            frame_input = Input.new(
                parent_input_id=self.input.id,
                sequence_map=self.input.sequence_map,
                file_id=frame.id,  # existing
                video_parent_length=len(source_video_frames),
                directory_id=self.input.directory_id,
                source_directory_id=self.input.source_directory_id,
                remove_link=self.input.remove_link,
                add_link=self.input.add_link,
                copy_instance_list=self.input.copy_instance_list,
                parent_file_id=destination_video_parent_file_id,
                # This is the parent video file where all data is going to be copied.
                project_id=self.input.project_id,
                mode='copy_file',
                type=None,
                media_type='frame',
            )

            item = process_media.PrioritizedItem(
                input=frame_input,
                frame_completion_controller=frame_completion_controller,
                total_frames=source_video_frames[len(source_video_frames) -
                                                 1].frame_number,
                num_frames_to_update=len(source_video_frames),
                media_type=frame_input.
                media_type,  # declaring here helps with routing
                priority=100 + frame.frame_number,  # Process in frame priority
                frame_number=frame.
                frame_number  # Careful, downstream process currently expects it
            )

            process_media.add_item_to_queue(item)
        return source_video_frames
Exemplo n.º 6
0
def input_detail_core(session, project: Project, input_id: int, log: dict):
    """
        TODO put as part of Input class
    """

    input = Input.get_by_id(session, id=input_id)
    if input.project_id != project.id:
        log['error']['project_id'] = 'Input and project ID mismatch'
        return False, log
    return input.serialize_with_frame_packet(), log
Exemplo n.º 7
0
    def test_s3_add_to_diffgram(self):
        created_input = self.s3conn.fetch_data({
            'action_type': 'fetch_object',
            'path': 'tests3connector/pablo/patrick.png',
            'bucket_name': '1',
            'event_data': {
                'request_user': 1,
                'date_time':
                datetime.datetime.now().strftime('%m/%d/%Y, %H:%M:%S'),
                'connection_id': -1
            }
        })

        self.assertEqual(type(created_input), type(Input()))
Exemplo n.º 8
0
 def test_packet_endpoint_refactor(self):
     packet_data = {
         'media': {
             'url':
             'https://thumbor.forbes.com/thumbor/250x382/https://blogs-images.forbes.com/dorothypomerantz/files/2011/09/Spongebob-squarepants.jpg?width=960',
             'type': 'image'
         }
     }
     created_input = packet.enqueue_packet(
         self.project_string_id,
         session=self.session,
         media_url=packet_data['media']['url'],
         media_type=packet_data['media']['type'],
         job_id=None,
         directory_id=None)
     self.session.commit()
     self.assertEqual(type(created_input), type(Input()))
Exemplo n.º 9
0
def enqueue_packet(project_string_id,
                   session,
                   media_url = None,
                   media_type = None,
                   file_id = None,
                   file_name = None,
                   job_id = None,
                   batch_id = None,
                   directory_id = None,
                   source_directory_id = None,
                   instance_list = None,
                   video_split_duration = None,
                   frame_packet_map = None,
                   remove_link = None,
                   add_link = None,
                   copy_instance_list = None,
                   commit_input = False,
                   task_id = None,
                   video_parent_length = None,
                   type = None,
                   task_action = None,
                   external_map_id = None,
                   original_filename = None,
                   external_map_action = None,
                   enqueue_immediately = False,
                   mode = None,
                   allow_duplicates = False,
                   extract_labels_from_batch = False):
    """
        Creates Input() object and enqueues it for media processing
        Returns Input() object that was created
    :param packet_data:
    :return:
    """
    diffgram_input = Input()
    project = Project.get(session, project_string_id)
    diffgram_input.file_id = file_id
    diffgram_input.task_id = task_id
    diffgram_input.batch_id = batch_id
    diffgram_input.video_parent_length = video_parent_length
    diffgram_input.remove_link = remove_link
    diffgram_input.add_link = add_link
    diffgram_input.copy_instance_list = copy_instance_list
    diffgram_input.external_map_id = external_map_id
    diffgram_input.original_filename = original_filename
    diffgram_input.external_map_action = external_map_action
    diffgram_input.task_action = task_action
    diffgram_input.mode = mode
    diffgram_input.project = project
    diffgram_input.media_type = media_type
    diffgram_input.type = "from_url"
    diffgram_input.url = media_url
    diffgram_input.video_split_duration = video_split_duration
    diffgram_input.allow_duplicates = allow_duplicates
    if instance_list:
        diffgram_input.instance_list = {}
        diffgram_input.instance_list['list'] = instance_list

    if frame_packet_map:
        diffgram_input.frame_packet_map = frame_packet_map
    # print(diffgram_input.frame_packet_map)

    session.add(diffgram_input)
    session.flush()

    if batch_id and extract_labels_from_batch:
        upload_tools = Upload(session = session, project = project, request = None)
        upload_tools.extract_instance_list_from_batch(input = diffgram_input,
                                                      input_batch_id = batch_id,
                                                      file_name = file_name)
    # Expect temp dir to be None here.
    # because each machine should assign it's own temp dir
    # Something else to consider for future here!
    # Once this is part of input, it will be smoothly handled at right time as part of
    # processing queue
    diffgram_input.job_id = job_id

    # Process media handles checking if the directory id is valid
    diffgram_input.directory_id = directory_id
    diffgram_input.source_directory_id = source_directory_id

    diffgram_input_id = diffgram_input.id

    queue_limit = 0
    if media_type == "image":
        queue_limit = 30  # 50
    if media_type == "video":
        queue_limit = 1

    if settings.PROCESS_MEDIA_ENQUEUE_LOCALLY_IMMEDIATELY is True or enqueue_immediately:

        print('diffgram_input_id', diffgram_input_id)
        if commit_input:
            regular_methods.commit_with_rollback(session = session)
        item = PrioritizedItem(
            priority = 10000,  # individual frames have a priority here.
            input_id = diffgram_input_id,
            media_type = media_type)
        add_item_to_queue(item)
    else:
        diffgram_input.processing_deferred = True  # Default

    return diffgram_input
Exemplo n.º 10
0
def task_template_launch_limits(session, task_template, log):
    """

    """

    # Different permissions depending on conditions ie share type
    # For now don't require billing to be enabled for non market jobs
    #  sending to Market clearly needs billing enabled
    # Future may want to still restrict jobs to paid accounts
    # For now in context of wanting trainer orgs to try it this seems reasonable
    # Potentially a lot to think about here...

    project = task_template.project

    if task_template.share_type == "Market":
        if project.api_billing_enabled is not True:
            log['error'][
                'billing'] = "Please enable billing or select Project / Org for share type. "

    # TODO  Limit count of active jobs? ie default to 3 active jobs?
    # Limit on number of files? ie default to 500 files max per job?

    # Basic info
    # For now this is checked by new job creation
    # so low priorty to double check here
    if task_template.status not in ['draft']:
        log['error']['job_status'] = "Job already launched."

    # Files
    task_template.update_file_count_statistic(session=session)
    attached_dir_list = session.query(JobWorkingDir).filter(
        JobWorkingDir.job_id == task_template.id).all()
    if task_template.file_count_statistic == 0 and len(attached_dir_list) == 0:
        log['error'][
            'attached_dir_list'] = "Must attach at least 1 file or directory"

    if task_template.file_count:
        if task_template.file_count_statistic != task_template.file_count:
            log['error']['file_count'] = str(task_template.file_count_statistic) + " processed files " + \
                                         "does not match set file_count: " + str(task_template.file_count)

    # note we are querying the input table here
    # suspect this is better then getting all the files
    # and doing a query for each to input
    # ie for getting bulk file status?

    # For retrying we may want to not include "removed" files
    # But a challenge here is that we are querying input not other thing
    # Also not sure if this really handles "failed" ones well...

    result = Input.directory_not_equal_to_status(
        session=session, directory_id=task_template.directory_id)

    # TODO may be some cases that this is overbearing / needs to be handled better
    # ie could call directory_not_equal_to_status with return type
    # of "objects" or something...

    print(result)

    if result > 0:
        log['error']['file_status'] = "Files processing. " + \
                                      "Try again in 30-60 minutes."

    # Credentials
    # ie Warn if missing ...
    # ie log['warn']['credentials'] = "No credentials required"

    # TODO if job type is exam check if grants at least one credential?

    # Guides

    if task_template.share_type in ["market"]:
        if task_template.guide_default_id is None:
            log['error']['guide_default'] = "Missing default guide"

    if task_template.type == "Normal":

        if task_template.guide_review_id is None:
            # Default review guide to being same as defualt guide
            # until we can handle this in better way
            task_template.guide_review = task_template.guide_default
            session.add(task_template)

        # Don't log error for now, see above default
        # log['error']['guide_review'] = "Missing review guide"

        # Bid(S)

    # Label check
    label_count = WorkingDirFileLink.file_list(
        session=session,
        working_dir_id=task_template.project.directory_default_id,
        type="label",
        counts_only=True,
    )
    if label_count == 0:
        log['error']['count'] = "Project must have at least 1 label"

    return log
Exemplo n.º 11
0
def input_from_local(session, log, project_string_id, http_input, file,
                     directory_id):
    # TODO review how we want to handle header options
    # Especially if needs to be outside of function for python requests...
    # immediate_mode = request.headers['immediate_mode']
    # Issues to be careful with ie string treamtment of 'True' vs True...
    immediate_mode = True

    input = Input()
    input.directory_id = directory_id

    if http_input['instance_list']:
        input.instance_list = {}
        input.instance_list['list'] = http_input['instance_list']

    if http_input['frame_packet_map']:
        input.frame_packet_map = http_input['frame_packet_map']

    # only need to make temp dir if file doesn't already exist...

    original_filename = secure_filename(
        file.filename
    )  # http://flask.pocoo.org/docs/0.12/patterns/fileuploads/

    input.extension = os.path.splitext(original_filename)[1].lower()
    input.original_filename = os.path.split(original_filename)[1]

    input.temp_dir = tempfile.mkdtemp()
    input.temp_dir_path_and_filename = input.temp_dir + \
                                       "/" + original_filename + input.extension

    project = Project.get(session, project_string_id)

    input.project = project

    input.media_type = None
    input.media_type = Process_Media.determine_media_type(input.extension)
    if not input.media_type:
        input.status = "failed"
        input.status_text = "Invalid file type: " + input.extension
        return False, log, input

    session.add(input)
    session.flush()

    with open(input.temp_dir_path_and_filename, "wb") as f:

        f.write(file.stream.read())

    # For LOCAL not normal upload
    file_size_limit = 9 * 1024 * 1024 * 1024

    file_size = os.path.getsize(
        input.temp_dir_path_and_filename)  # gets size in bytes

    if file_size > file_size_limit:
        input.status = "failed"
        input.status_text = "Exceeded max file size"
        return False, log, input

    if immediate_mode == True or immediate_mode is None:
        # Leave this as a direct call for time being, as we pass
        # the input back to thing on front end

        process_media = Process_Media(session=session, input=input)

        result = process_media.main_entry()

        # Always return input along with file?

        if result == True:
            return True, log, input

        if result == False:
            return False, log, input

    # Default
    priority = 100

    item = PrioritizedItem(priority=priority,
                           input_id=input.id,
                           media_type=input.media_type)

    add_item_to_queue(item)

    return True, log, input
Exemplo n.º 12
0
    def load(self,
             video_file_name,
             original_filename,
             extension,
             input: Input,
             directory_id=None):
        """

        Convert to .mp4 format if needed
        Upload .mp4 video
        Process each frame

        Arguments
            video_file_name, String, complete file path including directory, filename, and extension
            original_filename, String
            extension, String, includes ".", ie ".mp4"

        Returns
            None
        """

        try:

            clip = moviepy_editor.VideoFileClip(video_file_name)
            input.status = "loaded_video"
            input.time_loaded_video = datetime.datetime.utcnow()
            input.percent_complete = 20.0
            self.try_to_commit()

        except Exception as exception:
            input.status = "failed"
            input.status_text = "Could not load video. Try again, try a different format or contact us."
            # only for internal use
            # could look at storing in DB later or Using event logging.
            logger.error(
                'Could not load video. Try again, try a different format or contact us. Exception:  {}'
                .format(str(exception)))
            return None

        # https://stackoverflow.com/questions/43966523/getting-oserror-winerror-6-the-handle-is-invalid-in-videofileclip-function
        clip.reader.close()
        # Audio thing here too still doesn't seem to fix it...
        # clip.audio.reader.close_proc()

        # fps handling
        fps = self.project.settings_input_video_fps

        if fps is None:
            fps = 5

        if fps < 0 or fps > 120:
            input.status = "failed"
            input.status_text = "Invalid fps setting of " + fps
            return None

        original_fps = clip.fps  # Cache, since it will change

        # Always using original. FPS conversion is now deprecated
        fps = original_fps

        clip = clip.set_fps(fps)
        # https://zulko.github.io/moviepy/ref/VideoClip/VideoClip.html#moviepy.video.VideoClip.VideoClip.set_fps
        # Returns a copy of the clip with a new default fps for functions like write_videofile, iterframe, etc.

        # TODO do we want to save original

        # note these statements need to be after here in order to make sure
        # we update fps properly
        # otherwise have fps of say 0 and it's funny
        length = int(
            clip.duration * fps
        )  # Frame count (ESTIMATED) otherwise requires iteration / loop to get exact

        # temp higher limit for testing stuff
        # enough for a 120fps 5 minutes, or 60 fps 10 minutes
        frame_count_limit = 36000

        if length > frame_count_limit:
            input.status = "failed"
            input.status_text = "Frame count of " + str(length) + \
                                " exceeded limit of " + str(frame_count_limit) + " (per video)" + \
                                " Lower FPS conversion in settings, split into seperate files, or upgrade account."
            return None

        max_size = settings.DEFAULT_MAX_SIZE

        if clip.w > max_size or clip.h > max_size:
            clip = resize_video(clip)

        video_file_name = os.path.splitext(
            video_file_name)[0] + "_re_saved.mp4"

        if settings.PROCESS_MEDIA_TRY_BLOCK_ON is True:
            try:
                # See https://zulko.github.io/moviepy/ref/VideoClip/VideoClip.html?highlight=write_videofile#moviepy.video.io.VideoFileClip.VideoFileClip.write_videofile
                # And https://github.com/Zulko/moviepy/issues/645
                #	BUT note it's been renamed to "logger"

                # TODO maybe capture log output somewhere else for debugging?
                # Maybe we could use log to update input status / percent complete
                """
                Feb 9 2020 Audio to True seems to add issues
                ie : index -100001 is out of bounds for axis 0 with size 0 ffmpeg
                found this
                but I don't think that's it
                https://stackoverflow.com/questions/59358680/how-to-fix-out-of-bounds-error-in-to-soundarray-in-moviepy
            
                The strange part is that some of it works...
                TODO IF audio is a common issue, could have 2 try blocks
                but would want to have this as a function then.
                ie video with no audio is perhaps better then total failure, or total no audio.
                """

                clip.write_videofile(video_file_name,
                                     audio=False,
                                     threads=4,
                                     logger=None)
            except Exception as exception:
                input.status = "failed"
                input.status_text = "Could not write video file. Try a different format or contact us."
                logger.error(
                    'Could not write video file. Try a different format or contact us.)'
                )
                return None

        else:
            clip.write_videofile(video_file_name,
                                 audio=False,
                                 threads=4,
                                 logger=None)
        if not directory_id:
            directory_id = self.project.directory_default_id

        # Video file gets created in advance so
        # be careful to add project here
        """
        This is in the context of Video potentially wanting more stuff from the 
        "parent video".
        This needs a lot of work. For the moment we just get the parent input
        and copy a single attribute here for easier access later on.
        Directionally we want to think about stronger connections between
        split clips.
        And forest wise we need to grab this here because going back to get the input
        afterwards from file can be challenging becasue as the system does
        various modifications the parent gets further and further removed.
        """

        parent_video_split_duration = None
        try:
            parent_input = input.parent_input(self.session)
            if parent_input:
                parent_video_split_duration = parent_input.video_split_duration
        except:
            print("Could not get parent input")
        video, input.file = Video.new(
            session=self.session,
            project=self.project,
            filename=original_filename,
            frame_rate=clip.fps,
            frame_count=0,
            width=clip.w,
            height=clip.h,
            directory_id=directory_id,
            parent_input_id=input.parent_input_id,
            parent_video_split_duration=parent_video_split_duration,
            file_metadata=input.file_metadata,
        )

        if self.input.frame_packet_map:
            self.__prepare_sequences(parent_input=input)
            if self.check_update_log_errors() is False:
                return

        input.file.input_id = input.id  # revsere link is sometimes handy to have.

        # Jan 13, 2020 these are both computed above
        # Video object is not created yet so stored locally and then used here...
        video.original_fps = original_fps
        video.fps = fps
        video.offset_in_seconds = input.offset_in_seconds

        video.root_blob_path_to_frames = settings.PROJECT_IMAGES_BASE_DIR + \
                                         str(self.project.id) + "/" + str(video.id) + "/frames/"

        self.upload_video_file(video_file_name, ".mp4", video)

        input.status = "finished_writing_video_file"
        input.time_video_write_finished = datetime.datetime.utcnow()
        input.percent_complete = 30.0

        self.try_to_commit()

        self.session.add(video)
        initial_global_frame = 0
        if input.type == 'from_video_split':
            initial_global_frame = video.fps * input.offset_in_seconds
        for index, frame in enumerate(clip.iter_frames()):
            global_frame_number = frame

            if input.type == 'from_video_split':
                seconds_offset = input.offset_in_seconds
                offset_in_frames = video.fps * seconds_offset
                global_frame_number = index + offset_in_frames

            if index == 0:
                input.status = "pushing_frames_into_processing_queue"

            # This setups up input, see function below
            self.add_frame_to_queue(
                frame,
                index,
                original_filename,
                self.project,
                directory_id,
                video,
                length,
                input.file,  # assumes this is video_parent_file
                global_frame_number,
                initial_global_frame)

            # TODO clarify if this is actually showing up the queue as expected
            video.frame_count += 1

            # This is really key for monitoring efforts
            # Because at the moment this loop can be fairly slow

            if index % 10 == 0:
                # Where 10 is adding this every 10 frames
                # to be completed by next phase
                # at most this adds 1 when compelte so multiple by 30 to represent
                # this portion of the work
                input.percent_complete += (10 / length) * 30
                self.try_to_commit()

        # Clean up handled in process media..

        input.time_pushed_all_frames_to_queue = datetime.datetime.utcnow()

        return input.file
Exemplo n.º 13
0
    def add_frame_to_queue(self,
                           frame,
                           index: int,
                           original_filename: str,
                           project: Project,
                           directory_id,
                           video,
                           length,
                           video_parent_file: File,
                           global_frame_number=None,
                           initial_global_frame=None):
        """
        Where frame is:
            a HxWxN np.array, where N=1 for mask clips and N=3 for RGB clips.
            https://zulko.github.io/moviepy/ref/VideoClip/VideoClip.html


        Careful we don't have self. context here

        Cautions
            * We purposely do not not pass the job id, since we only want to original
            video to be added to the job

        Question,
            is it correct we create input class in part to maintain
            same concepts / format even for video frames?
            Answer: For example see frame_end_number is used to pass information
                Makes more sense to have it all in there then the PrioritizedItem() thing
                long term
                Also thinking in terms of logging
                And yes of course, then it's complete reuse of the component


        Jan 20, 2020
            Note we purposely do NOT commit this as it creates unneeded
            db overhead, so instead we only use it as a local object
            to maintain consistency of design

            which means we do NOT want to add to add it a sesion
            ie self.session.add(input)
        """

        input = Input()

        # Use input for class attributes,
        # but don't add it to the session for video?

        # TODO use File.new() for consistency here (ie as we add new things)

        # Single frame naming
        input.original_filename = original_filename + "_" + str(index)
        input.extension = ".jpg"
        input.media_type = "frame"

        input.temp_dir = tempfile.mkdtemp()
        input.project = project
        input.directory_id = directory_id
        input.parent_file_id = video_parent_file.id
        input.frame_packet_map = self.input.frame_packet_map

        # caution length is estimated. frame_count
        # is calculated as we roll through this so can't use it yet
        # Question: clarity on difference between numbers.
        # (I know estimate but still.)
        input.video_parent_length = length
        input.parent_input_id = self.input.id
        input.project_id = self.project.id

        # This is a temporary usage thing only
        # Note database persisted
        # Context of needing it to be defined so existing instances
        # Can use it (vs having to get video from db each time,
        # prior we defined this on first frame.
        input.root_blob_path_to_frames = video.root_blob_path_to_frames

        input = self.get_instance_list_from_packet_map(
            input=input,
            frame_number=index,
            global_frame_number=global_frame_number,
            initial_global_frame=initial_global_frame,
            from_video_split=self.input.type == 'from_video_split')
        """
        For frame priority, the original genesis was doing the last frame last
        but, I think it also makese sense to process in order in general.
        An alternative would be to say put a flag on the last frame
        but using order feels like a more general solution, assuming no suprises
        or extra overhead.


        Storing frames

        Maybe don't attach video_parent_file
        because it leads to not bound errors in ORM fairly easily.

        """

        # TODO, consider sending data as a "raw" blob
        # to cloud storage, then setting "processing deferred" to True here.

        # Process frames of videos started before new videos
        item = process_media.PrioritizedItem(
            priority=100 + index,  # Process in frame priority
            input=input,
            raw_numpy_image=frame,
            file_is_numpy_array=True,
            video_id=video.id,
            frame_number=index,
            global_frame_number=global_frame_number,
            media_type=input.media_type)

        process_media.add_item_to_queue(item)