def main(): from dbimpl import DBImpl model = keras.models.load_model('weights.h5') print 'finish loading model' print video_dir, images_dir db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) # if list_id in ['PLS1QulWo1RIbfTjQvTdj8Y6yyq4R7g-Al', 'PLFE2CE09D83EE3E28', 'PLE7E8B7F4856C9B19', 'PL27BCE863B6A864E3']: # continue print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: # video_path = os.path.join(list_folder, video_title + "_" + video_hash + ".mp4") video = video_title + "_" + video_hash print video predict_video(video, model)
def main(): # batch processing for the videos in the dataset from dbimpl import DBImpl db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) if not os.path.exists(list_folder): continue print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: video_path = os.path.join(list_folder, video_title + "_" + video_hash + ".mp4") if not os.path.exists(video_path): continue print video_path video = video_title + "_" + video_hash out_folder = os.path.join(images_dir, video) if os.path.exists(out_folder): continue else: os.mkdir(out_folder) extract_frames(video_path, out_folder=out_folder) diff_frames(out_folder)
def compare(): out_dir1 = os.path.join(working_dir, "Phase1", "SSIM") out_dir2 = os.path.join(working_dir, "Phase1", "NRMSE") db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select hash, title, playlist from videos where hash = ?' total = 0 with open("verified_videos.txt") as fin: for line in fin.readlines(): video_hash = line.strip() video_hash, video_title, video_playlist = db.queryone( sql, video_hash) # print video_title, video_hash video = video_title.strip() + '_' + video_hash frame_folder = os.path.join(out_dir1, video) with open(os.path.join(frame_folder, 'frames.txt')) as fin2: line = fin2.readlines()[0] frames1 = line.split() video = video_title.strip() + '_' + video_hash frame_folder = os.path.join(out_dir2, video) with open(os.path.join(frame_folder, 'frames.txt')) as fin2: line = fin2.readlines()[0] frames2 = line.split() print len(set(frames1) - set(frames2)), len(set(frames2) - set(frames1))
def run(metric="SSIM", thre=0.05): out_dir = os.path.join(working_dir, "Phase1", metric) db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select hash, title, playlist from videos where hash = ?' with open("verified_videos.txt") as fin: for line in fin.readlines(): video_hash = line.strip() video_hash, video_title, video_playlist = db.queryone( sql, video_hash) # print video_title, video_hash video = video_title.strip() + '_' + video_hash video_file = video + ".mp4" video_path = os.path.join(video_dir, video_playlist, video_file) if (not os.path.exists(video_path)): video_file = video_title.strip() + ".mp4" video_path = os.path.join(video_dir, video_playlist, video_file) # print video_path out_folder = os.path.join(out_dir, video) if os.path.exists(out_folder): # os.rmdir(out_folder) continue else: os.mkdir(out_folder) extract_frames(video_path, out_folder=out_folder) diff_frames(out_folder, thre=thre, metric=metric)
def __init__(self): self.dbimpl = DBImpl({ "type": "mysql", "url": "127.0.0.1", "username": "******", "password": "******", "database": "link_api" })
def batch(): from dbimpl import DBImpl import preprocess from video_tagging.predict import predict_video, load_model db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) default_config = { 'eps1': 3, 'eps2': 2, 'min_samples': 2, 'line_ratio': 0.7 } sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) if not os.path.exists(list_folder): continue print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: video_title = video_title.strip() video_folder = video_title + '_' + video_hash video_path = os.path.join(video_dir, list_id, video_folder + ".mp4") if not os.path.exists(os.path.join(images_dir, video_folder)): continue if not os.path.exists( os.path.join(images_dir, video_folder, 'predict.txt')): predict_video(video_folder, valid_model) if os.path.exists(os.path.join(crop_dir, video_folder)): continue cvideo = CVideo(video_folder, config=default_config) if len(cvideo.images) <= 0: continue if not os.path.exists(os.path.join(lines_dir, video_folder)): os.mkdir(os.path.join(lines_dir, video_folder)) cvideo.cluster_lines() cvideo.adjust_lines() cvideo.detect_rects() print video_title, video_hash cvideo.crop_rects()
def main(): from dbimpl import DBImpl db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: print video_title, video_hash google_ocr(video_title, video_hash)
def main(): db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: video_title = video_title.strip() video_folder = video_title + '_' + video_hash OCR_noise(video_folder)
def main(): with open("verified_videos.txt") as fin: process_hashes = [line.strip() for line in fin.readlines()] from dbimpl import DBImpl db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select a.hash, a.title from videos a, playlists b where a.playlist = b.id and a.used = 1 and b.used = 1' num = 1 for r in db.querymany(sql): video_hash, video_name = r video_name = video_name.strip() ocr_folder = os.path.join(ocr_dir, video_name + "_" + video_hash) if video_hash in process_hashes: print ocr_folder parser = GoogleOCRParser(video_name, ocr_folder) parser.correct_words()
def parser_response(): from dbimpl import DBImpl from setting import * db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = "insert into playlists(id, title, channel) values(?, ?, ?)" with open("youtube_search_list.json") as fin: result = json.load(fin) for item in result['items']: playlist_title = item['snippet']['title'] channel = item['snippet']['channelId'] playlist_id = item['id']['playlistId'] print playlist_id, playlist_title, channel db.updateone(sql, playlist_id, playlist_title, channel)
def main(): import os, sys from dbimpl import DBImpl db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select title from videos where hash = ?' video_hash = 'jJjg4JweJZU' frame = 143 # video_hash = 'o4Or0PMI_aI' # frame = 378 # video_hash = '6HydEu75iQI' # frame = 229 # video_hash = '6TIeyVWPvDY' # frame = 225 # video_hash = 'VKTEjBQzkgs' # frame = 37 # video_hash = 'KUdro0G1BV4' # frame = 81 video_title = db.queryone(sql, video_hash)[0].strip() print video_title, video_hash video_folder = video_title + '_' + video_hash completed_path = os.path.join(images_dir, video_folder, '%d.png'%frame) img = cv2.imread(completed_path) cimg = CImage(img, name=video_folder) cimg.preprocess() # cimg.show() # cimg.cluster_lines() rects = cimg.find_contours(show=False) rects = sorted(rects, key=lambda x: x[2]*x[3], reverse=True) x, y, w, h = rects[0] cv2.rectangle(cimg.img,(x,y),(x+w,y+h),(0,0,255),2) cv2.imshow('image', cimg.img) if cv2.waitKey(0) & 0xff == 27: cv2.destroyAllWindows()
def batch_crop(): db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) if os.path.exists(list_folder): continue print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: video_title = video_title.strip() video_folder = video_title + '_' + video_hash print video_folder crop_noisy_frame(video_folder)
def stat_valid(video_hash): db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select title, playlist from videos where hash = ?' res = db.queryone(sql, video_hash) video_name = res[0].strip() # image_folder = '../public/Images/%s_%s' % (video_name, video_hash) image_folder = images_dir + '/%s_%s' % (video_name, video_hash) with open(os.path.join(image_folder, 'predict.json')) as fin: predict_info = json.load(fin) valid_count, invalid_count = 0, 0 for f in predict_info: if predict_info[f]['label'] == 'valid': valid_count += 1 else: invalid_count += 1 print valid_count, invalid_count
def download(): db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select * from videos where playlist = ?' res = db.querymany(sql) video_folder = "/Volumes/Seagate/VideoAnalytics/Videos" for list_id, title in res: res = db.querymany(sql2, list_id) if len(res) > 0: print 'list has been downloaded', list_id continue print list_id, title playlist_url = "https://www.youtube.com/playlist?list=%s" % list_id output_folder = os.path.join(video_folder, list_id) if not os.path.exists(output_folder): os.mkdir(output_folder) videos = download_youtube_list(playlist_url, output_folder) for idx, (video_hash, title) in enumerate(videos): insert_video(db, video_hash, title, list_id, idx + 1) db.close()
def stat(metric="SSIM"): out_dir = os.path.join(working_dir, "Phase1", metric) db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select hash, title, playlist from videos where hash = ?' total = 0 with open("verified_videos.txt") as fin: for line in fin.readlines(): video_hash = line.strip() video_hash, video_title, video_playlist = db.queryone( sql, video_hash) # print video_title, video_hash video = video_title.strip() + '_' + video_hash frame_folder = os.path.join(out_dir, video) with open(os.path.join(frame_folder, 'frames.txt')) as fin2: line = fin2.readlines()[0] print(len(line.split())) total += len(line.split()) print total
class APIDBImpl: def __init__(self): self.dbimpl = DBImpl({ "type": "mysql", "url": "127.0.0.1", "username": "******", "password": "******", "database": "link_api" }) def query_records(self, entity): idx = entity.find('(') if idx > 0: entity = entity[0:idx].strip() sql = 'select * from link_api_record where name = %s' return self.dbimpl.querymany(sql, entity) def query_web_cache(self, link): sql = 'select * from web_cache where url = %s' return self.dbimpl.queryone(sql, link) def insert_or_update_cache(self, result): try: if not result[3]: sql = 'update web_cache set content=%s, access_time=%s where url=%s' self.dbimpl.updateone(sql, result[1], datetime.now(), result[2]) else: sql = 'insert web_cache(url, content) values(%s, %s)' self.dbimpl.updateone(sql, result[2], result[1]) except Exception as e: print e def close(self): self.dbimpl.close()
def get_db(): db = getattr(g, '_database', None) if db is None: db = g._database = DBImpl( {'url': os.path.join(playlists_dir, 'videos.db')}) return db
import diff_match_patch as dmp import ocr, re, sys, numpy, json import cv2 sys.path.append('../../python') from dbimpl import DBImpl from setting import * MIN_INTERVAL = 30 video_hash = sys.argv[1] db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select title, playlist from videos where hash = ?' res = db.queryone(sql, video_hash) video_name = res[0].strip() video_playlist = res[1].strip() video_file = video_name + "_" + video_hash # vnum, fnum, fnumf = int(sys.argv[1]), 1, 1. #4321 # fps = [15.002999, 29.970030, 30, 23.976150, 30, 29.970030, 30.001780, 30, 29.970030, 29.970030, 30, 15, 23.976024, 30, 15, 30, 29.873960, 30, 15, 25.000918, 30][vnum-1] #... print 'starting with frame', fnum, '\n' video_file = video_name + "_" + video_hash + ".mp4" video_path = os.path.join(video_dir, video_playlist, video_file) if(not os.path.exists(video_path)): video_file = video_name + ".mp4" video_path = os.path.join(video_dir, video_playlist, video_file) video = cv2.VideoCapture(video_path)
import os import json from dbimpl import DBImpl from sklearn.feature_extraction.text import TfidfVectorizer import pickle sys.path.append('..') from setting import * from util import correct_non_ascii from OCR.adjust_ocr import GoogleOCRParser, diff_lines from OCR.JavaLine import JavaLine from OCR.lm import JAVA_WORDS, JAVA_LINE_STRUCTURE db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select a.hash, a.title from videos a, playlists b where a.playlist = b.id and a.used = 1 and b.used = 1' res = db.querymany(sql) video_folders = [] video_hash_map = {} for video_hash, video_title in res: video_folders.append((video_title.strip(), video_hash)) video_hash_map[video_hash] = video_title # baseline def construct_index_with_noise(): all_docs = [] video_track = {} num = 0 all_frame_docs = [] frame_track = {}
import os, sys import json import cv2 from darkflow.net.build import TFNet from json import JSONEncoder, JSONDecoder import pickle import numpy sys.path.append('../') from dbimpl import DBImpl from setting import * db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) class PythonObjectEncoder(JSONEncoder): def default(self, obj): if isinstance(obj, (list, dict, str, unicode, int, float, bool, type(None))): return JSONEncoder.default(self, obj) elif isinstance(obj, numpy.floating): return float(obj) return {'_python_object': pickle.dumps(obj)} def predict(): options = {"model": "cfg/tiny-yolo-voc-1c.cfg", "load": -1, 'threshold': 0.1 # "gpu": 1.0 }
def extract_frames(video_hash): db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select title, playlist from videos where hash = ?' res = db.queryone(sql, video_hash) video_name = res[0].strip() video_playlist = res[1].strip() video_file = video_name + "_" + video_hash + ".mp4" video_path = os.path.join(video_dir, video_playlist, video_file) if (not os.path.exists(video_path)): video_file = video_name + ".mp4" video_path = os.path.join(video_dir, video_playlist, video_file) video = cv2.VideoCapture(video_path) fps = video.get(cv2.CAP_PROP_FPS) # fps = math.ceil(fps) # fps = 30 frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT) print('video fps/frame count:', fps, "/", frame_count) extract_folder = '../public/Images/%s_%s' % (video_name, video_hash) main_folder = '../public/extracts/%s_%s/main' % (video_name, video_hash) second = 1 filter_frames = [] frame_seg = {} seg_frame = {} while True: frame_num = math.ceil(second * fps) + 1 for seg in range(1, 4): file_path = os.path.join( main_folder, 'frame%d-segment%d.txt' % (frame_num, seg)) print(file_path) if os.path.exists(file_path): # print 'found', frame_num if frame_num not in filter_frames: filter_frames.append(frame_num) frame_seg[frame_num] = [seg] else: frame_seg[frame_num].append(seg) if seg in seg_frame: seg_frame[seg].append(frame_num) else: seg_frame[seg] = [frame_num] second += 1 if frame_num > frame_count: break # print filter_frames # print [int(math.floor((f)/fps)) for f in filter_frames] for f in frame_seg: if len(frame_seg[f]) > 1: print f for s in seg_frame: print(s, len(seg_frame[s]))