Пример #1
0
def dumpWithinContext(dirs):
    dumping_functions = [('all_segments.tab', dumpSegments),
                         ('all_canonicals.tab', dumpCanonicals)]
    for dumping_function in dumping_functions:
        log(INFO, "Storing %s" % (dumping_function[0]))
        filename = os.path.join(dirs[0], dumping_function[0])
        dumping_function[1](filename)
Пример #2
0
    def extract_all(self, job_path='', results_dir ='.'):
        """
        Extract all videos found in dir and below.

        :param job_path: path of a
        :param results_dir: store the results in the given directory
        :return: True if success, if not, the output_dir content should NOT be changed (clean up)
        """
        import shutil
        job_path = os.path.abspath(job_path)
        data = self._compose_job_definition(job_path)
        str_ = json.dumps(data, indent=4, sort_keys=False, separators=(',', ': '), ensure_ascii=False)
        with open(os.path.join(results_dir, 'job.json'), 'w') as new_file:
            new_file.write(str_)
        shutil.copy(os.path.join(str(self.extractor_location),'mime.types'),'.')
        with open(os.path.join(results_dir, 'cineast.json'), 'w') as new_file:
            with open(os.path.join(self.extractor_location, 'cineast.json'), 'r') as fp:
                data = json.load(fp)
                data["extractor"]["outputLocation"] = results_dir
                str_ = json.dumps(data, indent=4, sort_keys=False, separators=(',', ': '), ensure_ascii=False)
                new_file.write(str_)
        command = ['java', '-Xmx6G', '-Xms6G', '-jar', os.path.join(str(self.extractor_location),'cineast.jar'),
                   '--job', os.path.abspath(os.path.join(results_dir, 'job.json')), '--config', os.path.abspath(os.path.join(results_dir, 'cineast.json'))]
        p = Popen(command, stderr=PIPE, stdout=PIPE)
        stdout, stderr = p.communicate()
        log(INFO,stdout)
        log(ERROR,stderr)
        if p.returncode == 0:
            try:
                os.remove(os.path.abspath(os.path.join(results_dir, 'job.json')))
                os.remove( os.path.abspath(os.path.join(results_dir, 'cineast.json')))
            except:
                pass
        return p.returncode == 0
Пример #3
0
def unwrap(path, location=None, check_md5=True):
    """
    :param path:
    :param location:
    :param check_md5:
    :return: None if error
    """
    if location is None:
        location =  os.path.split(os.path.abspath(path))[0]
    newname = os.path.join(location, os.path.splitext(os.path.basename(path))[0])
    if os.path.exists(newname):
        return newname
    if not os.path.exists(location):
        mkdirs(location)
    with open(os.path.normpath(path), 'rb') as fp:
        scanner = Scanner(os.path.normpath(path), fp)
        if scanner.readHeader():
            newname = scanner.writeContents(location)
            if check_md5:
                md5 = scanner.md5(newname)
                if scanner.data['data_md5'] != md5:
                    log(ERROR,'corrupt {}'.format(path))
                    os.remove(newname)
                    return None
    return newname
Пример #4
0
def loadObjects(object_type, filename):
    from aida.media.object import MediaObject, MediaObjectIdentifier
    loader = MediaObjectIdentifier()
    data = loader.load(filename, object_type)
    matches = {}
    preloads = {}
    premappings = set()
    for x in db.session.query(Segment.frame_file_name, Segment.id).all():
        matches[x.frame_file_name] = x.id
    for x in db.session.query(SegmentObject.segment_id,
                              SegmentObject.bounding_box,
                              SegmentObject.id).all():
        preloads[x.segment_id + SegmentObject.bounding_box] = x.id
    for x in db.session.query(SegmentObjectMatch.to_obj,
                              SegmentObjectMatch.from_obj).all():
        premappings.add(x.to_obj + x.from_obj)
    mappings = {}
    for v in data.values():
        item = None
        levels = 3
        while levels > 0 and item is None:
            search_id = v.id.split(os.path.sep)[-levels:]
            key = os.path.sep.join(search_id)
            key = os.path.normpath(key)
            if key in matches:
                item = matches[key]
            levels -= 1
        if item is None:
            log(ERROR, "Cannot find file {} ".format(v.id))
        elif item + v.text_bounding_box() in preloads:
            mappings[v.id] = preloads[item + v.text_bounding_box()]
            continue
        else:
            log(INFO, "File {} ".format(v.id))
            new_id = uuid.uuid4().urn
            db.session.add(
                SegmentObject(id=new_id,
                              bounding_box=v.text_bounding_box(),
                              object_type=v.object_type,
                              segment_id=item))
            mappings[v.id] = new_id
    db.session.commit()
    commit_time = 1000
    for v in data.values():
        for rel in v.relationships:
            new_id = uuid.uuid4().urn
            #print("%s %s" % (mappings[rel[0]], mappings[v.id]))
            if (mappings[rel[0]] + mappings[v.id]) in premappings:
                continue
            db.session.add(
                SegmentObjectMatch(id=new_id,
                                   to_obj=mappings[rel[0]],
                                   from_obj=mappings[v.id],
                                   measure=rel[1]))
            commit_time -= 1
            if commit_time == 0:
                commit_time = 1000
                db.session.commit()
    db.session.commit()
Пример #5
0
 def _load_provider(self, app, provider):
     log(INFO, "Loading %s" % (provider.__class__.__name__))
     for k, v in provider.config(
             external_config=self.external_config).items():
         app.config[k] = v
     for dependency in provider.dependencies():
         if self.get_provider_by_name(dependency.__name__) is None:
             self.providers.append(dependency())
             self._load_provider(app, self.providers[-1])
Пример #6
0
def unZip(dir, item, destination):
    with ZipFile(os.path.join(dir, item), 'r') as inzip:
        for info in inzip.infolist():
            if info.filename[-1] == '/':
                continue
            if info.filename.endswith('ldcc'):
                info.filename = os.path.basename(info.filename)
                log(INFO,'extracting: ' + str(info.filename))
                inzip.extract(info, destination)
                yield os.path.join(destination,info.filename)
Пример #7
0
def findFFMPEG():
    try:
        p = Popen('ffmpeg', stderr=PIPE)
        p.communicate()
        return 'ffmpeg'
    except FileNotFoundError as e:
        try:
            return os.environ['FFMPEG_BIN']
        except KeyError:
            log(
                ERROR,
                'Cannot find ffmpeg.  Environment variable FFMPEG_BIN not set')
Пример #8
0
 def extract(self, dirs, destination):
     extractor = KeyFrameExtractor(self.cineast)
     for dir in dirs:
         ldc.unZipAll(dir,
                      filters=['mp4'],
                      location=destination,
                      unwrap_files=True,
                      cleanup=True)
     object_json = os.path.join(destination, 'json',
                                'cineast_multimediaobject.json')
     segment_json = os.path.join(destination, 'json',
                                 'cineast_segment.json')
     representative_json = os.path.join(
         destination, 'json', 'cineast_representativeframes.json')
     if not os.path.exists(object_json) or not os.path.exists(segment_json):
         extractor.extract_all(job_path=destination,
                               results_dir=destination)
     if not os.path.exists(object_json) or not os.path.exists(
             segment_json) or not os.path.exists(representative_json):
         log(ERROR, 'key frame extraction failed')
Пример #9
0
    def default(self, obj):
        def _prim_check(obj):
            if isinstance(obj, (datetime, date)):
                return obj.isoformat()
            elif isinstance(obj.__class__,
                            InstrumentedList) or type(obj) == InstrumentedList:
                return [self.default(item) for item in obj]
            else:
                return obj

        def _complex_check(obj):
            if isinstance(obj.__class__, DeclarativeMeta):
                # an SQLAlchemy class
                fields = {}
                valid_values = obj.__json__() if hasattr(obj,
                                                         '__json__') else None
                for field in [x for x in dir(obj) if not x.startswith('_') and x != 'metadata' and \
                        (valid_values is None or x in valid_values)]:
                    data = obj.__getattribute__(field)
                    if isinstance(data, BaseQuery):
                        continue
                    result = _complex_check(data)
                    fields[field] = result
                return fields

            if isinstance(obj.__class__,
                          InstrumentedList) or type(obj) == InstrumentedList:
                return [_complex_check(item) for item in obj]

            return _prim_check(obj)

        try:
            return _complex_check(obj)
        except TypeError as e:
            log(ERROR, str(e))
            return None
Пример #10
0
def getReferencedFramesGivenSegments(filename,
                                     segments=[],
                                     chunk_size=20,
                                     alternate_directory=None):
    """

    :param filename:
    :param segments:
    :param chunk_size:
    :return:
    @rtype: list (VideoSegment)
    """
    chunks = [
        segments[i:i + chunk_size] for i in range(0, len(segments), chunk_size)
    ]
    frames_done = 0
    for chunk in chunks:
        expression = [
            'eq(n\,%d)' % (segment.representative_frame_number)
            for segment in chunk
        ]
        base = os.path.splitext(filename)[0]
        if alternate_directory is not None:
            base = os.path.join(alternate_directory, os.path.basename(base))
        expression_str = '+'.join(expression)
        if ffmpeg_command is None or len(ffmpeg_command) == 0:
            log(ERROR, 'FFMPEG COMMAND NOT FOUND')
        command = [
            ffmpeg_command, '-i', filename, '-vf', 'select=' + expression_str,
            '-vsync', '0', '-start_number',
            str(frames_done), '{}_%06d.png'.format(base)
        ]
        log(INFO, ' '.join(command))
        p = Popen(command, stderr=PIPE)
        stderr = p.communicate()
        if p.returncode == 0:
            for i in range(len(chunk)):
                segment = segments[frames_done]
                segment_file = '%s%06d.png' % (base + '_', frames_done)
                segment.set_sample_img(cv2.imread(segment_file), segment_file)
                yield segment
        else:
            log(ERROR, stderr)
Пример #11
0
def loadWithinContext(dirs):
    import re
    from functools import partial
    # map to loading functions
    # order by the order loading.
    # None is indicative of future use!
    loading_function_mappings = [
        ("hypothesis_info\.tab", loadHypothesis),
        ("media_list.tab", loadMedia),
        ("parent_children\.tab", loadParentChildDetails),
        ("twitter_info\.tab", None),
        ("uid_info\.tab", None),
        ("uids_missing_video\.tab", None),
        ("T.*KB\.tab", loadKB),
        ("T.*ent_mentions\.tab", loadKBEntities),  # yes, same file
        ("T.*ent_mentions\.tab", loadMediaEntities),  # yes, same file
        ("T.*ent_mentions\.tab", loadEntities),  # yes, same file
        ("T.*ent_mentions\.tab", loadEntityMentions),
        ("T.*evt_mentions\.tab", None),
        ("T.*evt_slots\.tab", None),
        ("T.*rel_mentions\.tab", None),
        ("T.*rel_slots\.tab", None),
        ("T.*hypothesis\.tab", None),
        ("rel_evt_slot_mapping_table\.tab", None),
        ("doc_lang\.tab", None),
        (".*canonicals\.tab", loadCanonicals),
        (".*segments\.tab", loadSegments),
        (".*_multimediaobject\.json", loadKeyFrames),
        ("image.json", loadImages),
        ("ocr.txt", partial(loadOCR, 'inf')),
        ("object_faces.*\.json", partial(loadObjects, 'fac'))
    ]
    loading_function_mappings = [(re.compile(mapping[0]), mapping[1])
                                 for mapping in loading_function_mappings]

    # find the files; organize by name
    files_by_match = [[] for pos in range(len(loading_function_mappings))]
    count = 0
    for dir in dirs:
        log(INFO, "Scanning directory %s" % (dir))
        for root, walked_dirs, files in os.walk(dir):
            for name in files:
                for pos in range(len(loading_function_mappings)):
                    if loading_function_mappings[pos][1] is None:
                        continue
                    if loading_function_mappings[pos][0].match(
                            name) is not None:
                        count += 1
                        files_by_match[pos].append(os.path.join(root, name))

    log(INFO, "Loading %d files" % (count))
    for pos in range(len(loading_function_mappings)):
        mapping = loading_function_mappings[pos]
        matched_files = files_by_match[pos]
        # have a loading function?
        if mapping[1] is not None:
            # load each matching the mapping's name
            for matched_file in matched_files:
                log(INFO, "Loading %s" % (matched_file))
                try:
                    mapping[1](matched_file)
                except Exception as e:
                    log(ERROR,
                        "Failed to load %s: %s" % (matched_file, str(e)))
                    exc_type, exc_value, exc_traceback = exc_info()
                    print_tb(exc_traceback, limit=1, file=stdout)
                    raise e
    db.session.commit()
    log(INFO, "Loading complete")
Пример #12
0
 def __init__(self, languages=['eng']):
     self.languages = languages
     log(INFO, 'configured OCR with languages %s' % ', '.join(languages))