def dumpWithinContext(dirs): dumping_functions = [('all_segments.tab', dumpSegments), ('all_canonicals.tab', dumpCanonicals)] for dumping_function in dumping_functions: log(INFO, "Storing %s" % (dumping_function[0])) filename = os.path.join(dirs[0], dumping_function[0]) dumping_function[1](filename)
def extract_all(self, job_path='', results_dir ='.'): """ Extract all videos found in dir and below. :param job_path: path of a :param results_dir: store the results in the given directory :return: True if success, if not, the output_dir content should NOT be changed (clean up) """ import shutil job_path = os.path.abspath(job_path) data = self._compose_job_definition(job_path) str_ = json.dumps(data, indent=4, sort_keys=False, separators=(',', ': '), ensure_ascii=False) with open(os.path.join(results_dir, 'job.json'), 'w') as new_file: new_file.write(str_) shutil.copy(os.path.join(str(self.extractor_location),'mime.types'),'.') with open(os.path.join(results_dir, 'cineast.json'), 'w') as new_file: with open(os.path.join(self.extractor_location, 'cineast.json'), 'r') as fp: data = json.load(fp) data["extractor"]["outputLocation"] = results_dir str_ = json.dumps(data, indent=4, sort_keys=False, separators=(',', ': '), ensure_ascii=False) new_file.write(str_) command = ['java', '-Xmx6G', '-Xms6G', '-jar', os.path.join(str(self.extractor_location),'cineast.jar'), '--job', os.path.abspath(os.path.join(results_dir, 'job.json')), '--config', os.path.abspath(os.path.join(results_dir, 'cineast.json'))] p = Popen(command, stderr=PIPE, stdout=PIPE) stdout, stderr = p.communicate() log(INFO,stdout) log(ERROR,stderr) if p.returncode == 0: try: os.remove(os.path.abspath(os.path.join(results_dir, 'job.json'))) os.remove( os.path.abspath(os.path.join(results_dir, 'cineast.json'))) except: pass return p.returncode == 0
def unwrap(path, location=None, check_md5=True): """ :param path: :param location: :param check_md5: :return: None if error """ if location is None: location = os.path.split(os.path.abspath(path))[0] newname = os.path.join(location, os.path.splitext(os.path.basename(path))[0]) if os.path.exists(newname): return newname if not os.path.exists(location): mkdirs(location) with open(os.path.normpath(path), 'rb') as fp: scanner = Scanner(os.path.normpath(path), fp) if scanner.readHeader(): newname = scanner.writeContents(location) if check_md5: md5 = scanner.md5(newname) if scanner.data['data_md5'] != md5: log(ERROR,'corrupt {}'.format(path)) os.remove(newname) return None return newname
def loadObjects(object_type, filename): from aida.media.object import MediaObject, MediaObjectIdentifier loader = MediaObjectIdentifier() data = loader.load(filename, object_type) matches = {} preloads = {} premappings = set() for x in db.session.query(Segment.frame_file_name, Segment.id).all(): matches[x.frame_file_name] = x.id for x in db.session.query(SegmentObject.segment_id, SegmentObject.bounding_box, SegmentObject.id).all(): preloads[x.segment_id + SegmentObject.bounding_box] = x.id for x in db.session.query(SegmentObjectMatch.to_obj, SegmentObjectMatch.from_obj).all(): premappings.add(x.to_obj + x.from_obj) mappings = {} for v in data.values(): item = None levels = 3 while levels > 0 and item is None: search_id = v.id.split(os.path.sep)[-levels:] key = os.path.sep.join(search_id) key = os.path.normpath(key) if key in matches: item = matches[key] levels -= 1 if item is None: log(ERROR, "Cannot find file {} ".format(v.id)) elif item + v.text_bounding_box() in preloads: mappings[v.id] = preloads[item + v.text_bounding_box()] continue else: log(INFO, "File {} ".format(v.id)) new_id = uuid.uuid4().urn db.session.add( SegmentObject(id=new_id, bounding_box=v.text_bounding_box(), object_type=v.object_type, segment_id=item)) mappings[v.id] = new_id db.session.commit() commit_time = 1000 for v in data.values(): for rel in v.relationships: new_id = uuid.uuid4().urn #print("%s %s" % (mappings[rel[0]], mappings[v.id])) if (mappings[rel[0]] + mappings[v.id]) in premappings: continue db.session.add( SegmentObjectMatch(id=new_id, to_obj=mappings[rel[0]], from_obj=mappings[v.id], measure=rel[1])) commit_time -= 1 if commit_time == 0: commit_time = 1000 db.session.commit() db.session.commit()
def _load_provider(self, app, provider): log(INFO, "Loading %s" % (provider.__class__.__name__)) for k, v in provider.config( external_config=self.external_config).items(): app.config[k] = v for dependency in provider.dependencies(): if self.get_provider_by_name(dependency.__name__) is None: self.providers.append(dependency()) self._load_provider(app, self.providers[-1])
def unZip(dir, item, destination): with ZipFile(os.path.join(dir, item), 'r') as inzip: for info in inzip.infolist(): if info.filename[-1] == '/': continue if info.filename.endswith('ldcc'): info.filename = os.path.basename(info.filename) log(INFO,'extracting: ' + str(info.filename)) inzip.extract(info, destination) yield os.path.join(destination,info.filename)
def findFFMPEG(): try: p = Popen('ffmpeg', stderr=PIPE) p.communicate() return 'ffmpeg' except FileNotFoundError as e: try: return os.environ['FFMPEG_BIN'] except KeyError: log( ERROR, 'Cannot find ffmpeg. Environment variable FFMPEG_BIN not set')
def extract(self, dirs, destination): extractor = KeyFrameExtractor(self.cineast) for dir in dirs: ldc.unZipAll(dir, filters=['mp4'], location=destination, unwrap_files=True, cleanup=True) object_json = os.path.join(destination, 'json', 'cineast_multimediaobject.json') segment_json = os.path.join(destination, 'json', 'cineast_segment.json') representative_json = os.path.join( destination, 'json', 'cineast_representativeframes.json') if not os.path.exists(object_json) or not os.path.exists(segment_json): extractor.extract_all(job_path=destination, results_dir=destination) if not os.path.exists(object_json) or not os.path.exists( segment_json) or not os.path.exists(representative_json): log(ERROR, 'key frame extraction failed')
def default(self, obj): def _prim_check(obj): if isinstance(obj, (datetime, date)): return obj.isoformat() elif isinstance(obj.__class__, InstrumentedList) or type(obj) == InstrumentedList: return [self.default(item) for item in obj] else: return obj def _complex_check(obj): if isinstance(obj.__class__, DeclarativeMeta): # an SQLAlchemy class fields = {} valid_values = obj.__json__() if hasattr(obj, '__json__') else None for field in [x for x in dir(obj) if not x.startswith('_') and x != 'metadata' and \ (valid_values is None or x in valid_values)]: data = obj.__getattribute__(field) if isinstance(data, BaseQuery): continue result = _complex_check(data) fields[field] = result return fields if isinstance(obj.__class__, InstrumentedList) or type(obj) == InstrumentedList: return [_complex_check(item) for item in obj] return _prim_check(obj) try: return _complex_check(obj) except TypeError as e: log(ERROR, str(e)) return None
def getReferencedFramesGivenSegments(filename, segments=[], chunk_size=20, alternate_directory=None): """ :param filename: :param segments: :param chunk_size: :return: @rtype: list (VideoSegment) """ chunks = [ segments[i:i + chunk_size] for i in range(0, len(segments), chunk_size) ] frames_done = 0 for chunk in chunks: expression = [ 'eq(n\,%d)' % (segment.representative_frame_number) for segment in chunk ] base = os.path.splitext(filename)[0] if alternate_directory is not None: base = os.path.join(alternate_directory, os.path.basename(base)) expression_str = '+'.join(expression) if ffmpeg_command is None or len(ffmpeg_command) == 0: log(ERROR, 'FFMPEG COMMAND NOT FOUND') command = [ ffmpeg_command, '-i', filename, '-vf', 'select=' + expression_str, '-vsync', '0', '-start_number', str(frames_done), '{}_%06d.png'.format(base) ] log(INFO, ' '.join(command)) p = Popen(command, stderr=PIPE) stderr = p.communicate() if p.returncode == 0: for i in range(len(chunk)): segment = segments[frames_done] segment_file = '%s%06d.png' % (base + '_', frames_done) segment.set_sample_img(cv2.imread(segment_file), segment_file) yield segment else: log(ERROR, stderr)
def loadWithinContext(dirs): import re from functools import partial # map to loading functions # order by the order loading. # None is indicative of future use! loading_function_mappings = [ ("hypothesis_info\.tab", loadHypothesis), ("media_list.tab", loadMedia), ("parent_children\.tab", loadParentChildDetails), ("twitter_info\.tab", None), ("uid_info\.tab", None), ("uids_missing_video\.tab", None), ("T.*KB\.tab", loadKB), ("T.*ent_mentions\.tab", loadKBEntities), # yes, same file ("T.*ent_mentions\.tab", loadMediaEntities), # yes, same file ("T.*ent_mentions\.tab", loadEntities), # yes, same file ("T.*ent_mentions\.tab", loadEntityMentions), ("T.*evt_mentions\.tab", None), ("T.*evt_slots\.tab", None), ("T.*rel_mentions\.tab", None), ("T.*rel_slots\.tab", None), ("T.*hypothesis\.tab", None), ("rel_evt_slot_mapping_table\.tab", None), ("doc_lang\.tab", None), (".*canonicals\.tab", loadCanonicals), (".*segments\.tab", loadSegments), (".*_multimediaobject\.json", loadKeyFrames), ("image.json", loadImages), ("ocr.txt", partial(loadOCR, 'inf')), ("object_faces.*\.json", partial(loadObjects, 'fac')) ] loading_function_mappings = [(re.compile(mapping[0]), mapping[1]) for mapping in loading_function_mappings] # find the files; organize by name files_by_match = [[] for pos in range(len(loading_function_mappings))] count = 0 for dir in dirs: log(INFO, "Scanning directory %s" % (dir)) for root, walked_dirs, files in os.walk(dir): for name in files: for pos in range(len(loading_function_mappings)): if loading_function_mappings[pos][1] is None: continue if loading_function_mappings[pos][0].match( name) is not None: count += 1 files_by_match[pos].append(os.path.join(root, name)) log(INFO, "Loading %d files" % (count)) for pos in range(len(loading_function_mappings)): mapping = loading_function_mappings[pos] matched_files = files_by_match[pos] # have a loading function? if mapping[1] is not None: # load each matching the mapping's name for matched_file in matched_files: log(INFO, "Loading %s" % (matched_file)) try: mapping[1](matched_file) except Exception as e: log(ERROR, "Failed to load %s: %s" % (matched_file, str(e))) exc_type, exc_value, exc_traceback = exc_info() print_tb(exc_traceback, limit=1, file=stdout) raise e db.session.commit() log(INFO, "Loading complete")
def __init__(self, languages=['eng']): self.languages = languages log(INFO, 'configured OCR with languages %s' % ', '.join(languages))