Пример #1
0
def main():
    from dbimpl import DBImpl

    model = keras.models.load_model('weights.h5')
    print 'finish loading model'

    print video_dir, images_dir

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)

        # if list_id in ['PLS1QulWo1RIbfTjQvTdj8Y6yyq4R7g-Al', 'PLFE2CE09D83EE3E28', 'PLE7E8B7F4856C9B19', 'PL27BCE863B6A864E3']:
            # continue
        print list_id
        
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            # video_path = os.path.join(list_folder, video_title + "_" + video_hash + ".mp4")

            video = video_title + "_" + video_hash

            print video 
            predict_video(video, model)
Пример #2
0
def main():  # batch processing for the videos in the dataset
    from dbimpl import DBImpl

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)
        if not os.path.exists(list_folder):
            continue

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            video_path = os.path.join(list_folder,
                                      video_title + "_" + video_hash + ".mp4")

            if not os.path.exists(video_path):
                continue
            print video_path

            video = video_title + "_" + video_hash
            out_folder = os.path.join(images_dir, video)
            if os.path.exists(out_folder):
                continue
            else:
                os.mkdir(out_folder)

            extract_frames(video_path, out_folder=out_folder)
            diff_frames(out_folder)
Пример #3
0
def compare():
    out_dir1 = os.path.join(working_dir, "Phase1", "SSIM")
    out_dir2 = os.path.join(working_dir, "Phase1", "NRMSE")

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select hash, title, playlist from videos where hash = ?'
    total = 0
    with open("verified_videos.txt") as fin:
        for line in fin.readlines():
            video_hash = line.strip()
            video_hash, video_title, video_playlist = db.queryone(
                sql, video_hash)

            # print video_title, video_hash

            video = video_title.strip() + '_' + video_hash
            frame_folder = os.path.join(out_dir1, video)

            with open(os.path.join(frame_folder, 'frames.txt')) as fin2:
                line = fin2.readlines()[0]
                frames1 = line.split()

            video = video_title.strip() + '_' + video_hash
            frame_folder = os.path.join(out_dir2, video)

            with open(os.path.join(frame_folder, 'frames.txt')) as fin2:
                line = fin2.readlines()[0]
                frames2 = line.split()

            print len(set(frames1) -
                      set(frames2)), len(set(frames2) - set(frames1))
Пример #4
0
def run(metric="SSIM", thre=0.05):
    out_dir = os.path.join(working_dir, "Phase1", metric)

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select hash, title, playlist from videos where hash = ?'

    with open("verified_videos.txt") as fin:
        for line in fin.readlines():
            video_hash = line.strip()
            video_hash, video_title, video_playlist = db.queryone(
                sql, video_hash)

            # print video_title, video_hash

            video = video_title.strip() + '_' + video_hash

            video_file = video + ".mp4"
            video_path = os.path.join(video_dir, video_playlist, video_file)

            if (not os.path.exists(video_path)):
                video_file = video_title.strip() + ".mp4"
                video_path = os.path.join(video_dir, video_playlist,
                                          video_file)

            # print video_path
            out_folder = os.path.join(out_dir, video)
            if os.path.exists(out_folder):
                # os.rmdir(out_folder)
                continue
            else:
                os.mkdir(out_folder)

            extract_frames(video_path, out_folder=out_folder)
            diff_frames(out_folder, thre=thre, metric=metric)
Пример #5
0
 def __init__(self):
     self.dbimpl = DBImpl({
         "type": "mysql",
         "url": "127.0.0.1",
         "username": "******",
         "password": "******",
         "database": "link_api"
     })
Пример #6
0
def batch():
    from dbimpl import DBImpl
    import preprocess
    from video_tagging.predict import predict_video, load_model

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    default_config = {
        'eps1': 3,
        'eps2': 2,
        'min_samples': 2,
        'line_ratio': 0.7
    }

    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)
        if not os.path.exists(list_folder):
            continue

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            video_title = video_title.strip()
            video_folder = video_title + '_' + video_hash

            video_path = os.path.join(video_dir, list_id,
                                      video_folder + ".mp4")

            if not os.path.exists(os.path.join(images_dir, video_folder)):
                continue
            if not os.path.exists(
                    os.path.join(images_dir, video_folder, 'predict.txt')):
                predict_video(video_folder, valid_model)

            if os.path.exists(os.path.join(crop_dir, video_folder)):
                continue

            cvideo = CVideo(video_folder, config=default_config)
            if len(cvideo.images) <= 0:
                continue

            if not os.path.exists(os.path.join(lines_dir, video_folder)):
                os.mkdir(os.path.join(lines_dir, video_folder))

            cvideo.cluster_lines()
            cvideo.adjust_lines()
            cvideo.detect_rects()

            print video_title, video_hash
            cvideo.crop_rects()
Пример #7
0
def main():
    from dbimpl import DBImpl

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            print video_title, video_hash
            google_ocr(video_title, video_hash)
Пример #8
0
def main():
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})

    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            video_title = video_title.strip()
            video_folder = video_title + '_' + video_hash

            OCR_noise(video_folder)
Пример #9
0
def main():
    with open("verified_videos.txt") as fin:
        process_hashes = [line.strip() for line in fin.readlines()]

    from dbimpl import DBImpl
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select a.hash, a.title from videos a, playlists b where a.playlist = b.id and a.used = 1 and b.used = 1'
    num = 1
    for r in db.querymany(sql):
        video_hash, video_name = r
        video_name = video_name.strip()
        ocr_folder = os.path.join(ocr_dir, video_name + "_" + video_hash)

        if video_hash in process_hashes:
            print ocr_folder
            parser = GoogleOCRParser(video_name, ocr_folder)
            parser.correct_words()
Пример #10
0
def parser_response():
    from dbimpl import DBImpl
    from setting import *
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})

    sql = "insert into playlists(id, title, channel) values(?, ?, ?)"

    with open("youtube_search_list.json") as fin:
        result = json.load(fin)

        for item in result['items']:
            playlist_title = item['snippet']['title']
            channel = item['snippet']['channelId']
            playlist_id = item['id']['playlistId']

            print playlist_id, playlist_title, channel
            db.updateone(sql, playlist_id, playlist_title, channel)
Пример #11
0
def main():
    import os, sys
    from dbimpl import DBImpl

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select title from videos where hash = ?'

    video_hash = 'jJjg4JweJZU'
    frame = 143

    # video_hash = 'o4Or0PMI_aI'
    # frame = 378

    # video_hash = '6HydEu75iQI'
    # frame = 229

    # video_hash = '6TIeyVWPvDY'
    # frame = 225

    # video_hash = 'VKTEjBQzkgs'
    # frame = 37

    # video_hash = 'KUdro0G1BV4'
    # frame = 81

    video_title = db.queryone(sql, video_hash)[0].strip()
    print video_title, video_hash
    
    video_folder = video_title + '_' + video_hash
    completed_path = os.path.join(images_dir, video_folder, '%d.png'%frame)

    img = cv2.imread(completed_path)
    cimg = CImage(img, name=video_folder)
    cimg.preprocess()
    # cimg.show()
    # cimg.cluster_lines()
    rects = cimg.find_contours(show=False)

    rects = sorted(rects, key=lambda x: x[2]*x[3], reverse=True)
    x, y, w, h = rects[0]
    cv2.rectangle(cimg.img,(x,y),(x+w,y+h),(0,0,255),2)

    cv2.imshow('image', cimg.img)
    if cv2.waitKey(0) & 0xff == 27:
        cv2.destroyAllWindows()
Пример #12
0
def batch_crop():
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})

    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)
        if os.path.exists(list_folder):
            continue

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            video_title = video_title.strip()
            video_folder = video_title + '_' + video_hash
            print video_folder

            crop_noisy_frame(video_folder)
Пример #13
0
def stat_valid(video_hash):
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select title, playlist from videos where hash = ?'
    res = db.queryone(sql, video_hash)
    video_name = res[0].strip()

    # image_folder = '../public/Images/%s_%s' % (video_name, video_hash)
    image_folder = images_dir + '/%s_%s' % (video_name, video_hash)

    with open(os.path.join(image_folder, 'predict.json')) as fin:
        predict_info = json.load(fin)

        valid_count, invalid_count = 0, 0
        for f in predict_info:
            if predict_info[f]['label'] == 'valid':
                valid_count += 1
            else:
                invalid_count += 1

        print valid_count, invalid_count
Пример #14
0
def download():
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select * from videos where playlist = ?'
    res = db.querymany(sql)

    video_folder = "/Volumes/Seagate/VideoAnalytics/Videos"
    for list_id, title in res:
        res = db.querymany(sql2, list_id)
        if len(res) > 0:
            print 'list has been downloaded', list_id
            continue

        print list_id, title
        playlist_url = "https://www.youtube.com/playlist?list=%s" % list_id

        output_folder = os.path.join(video_folder, list_id)
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)

        videos = download_youtube_list(playlist_url, output_folder)

        for idx, (video_hash, title) in enumerate(videos):
            insert_video(db, video_hash, title, list_id, idx + 1)

    db.close()
Пример #15
0
def stat(metric="SSIM"):
    out_dir = os.path.join(working_dir, "Phase1", metric)

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select hash, title, playlist from videos where hash = ?'
    total = 0
    with open("verified_videos.txt") as fin:
        for line in fin.readlines():
            video_hash = line.strip()
            video_hash, video_title, video_playlist = db.queryone(
                sql, video_hash)

            # print video_title, video_hash

            video = video_title.strip() + '_' + video_hash
            frame_folder = os.path.join(out_dir, video)

            with open(os.path.join(frame_folder, 'frames.txt')) as fin2:
                line = fin2.readlines()[0]
                print(len(line.split()))
                total += len(line.split())
    print total
Пример #16
0
class APIDBImpl:
    def __init__(self):
        self.dbimpl = DBImpl({
            "type": "mysql",
            "url": "127.0.0.1",
            "username": "******",
            "password": "******",
            "database": "link_api"
        })

    def query_records(self, entity):
        idx = entity.find('(')
        if idx > 0:
            entity = entity[0:idx].strip()

        sql = 'select * from link_api_record where name = %s'
        return self.dbimpl.querymany(sql, entity)

    def query_web_cache(self, link):
        sql = 'select * from web_cache where url = %s'
        return self.dbimpl.queryone(sql, link)

    def insert_or_update_cache(self, result):
        try:
            if not result[3]:
                sql = 'update web_cache set content=%s, access_time=%s where url=%s'
                self.dbimpl.updateone(sql, result[1], datetime.now(),
                                      result[2])
            else:
                sql = 'insert web_cache(url, content) values(%s, %s)'
                self.dbimpl.updateone(sql, result[2], result[1])
        except Exception as e:
            print e

    def close(self):
        self.dbimpl.close()
Пример #17
0
def get_db():
    db = getattr(g, '_database', None)
    if db is None:
        db = g._database = DBImpl(
            {'url': os.path.join(playlists_dir, 'videos.db')})
    return db
Пример #18
0
import diff_match_patch as dmp
import ocr, re, sys, numpy, json
import cv2
sys.path.append('../../python')
from dbimpl import DBImpl
from setting import *

MIN_INTERVAL = 30

video_hash = sys.argv[1]

db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
sql = 'select title, playlist from videos where hash = ?'
res = db.queryone(sql, video_hash)
video_name = res[0].strip()
video_playlist = res[1].strip()

video_file = video_name + "_" + video_hash

# vnum, fnum, fnumf = int(sys.argv[1]), 1, 1. #4321
# fps = [15.002999, 29.970030, 30, 23.976150, 30, 29.970030, 30.001780, 30, 29.970030, 29.970030, 30, 15, 23.976024, 30, 15, 30, 29.873960, 30, 15, 25.000918, 30][vnum-1]
#... print 'starting with frame', fnum, '\n'

video_file = video_name + "_" + video_hash + ".mp4"
video_path = os.path.join(video_dir, video_playlist, video_file)

if(not os.path.exists(video_path)):
    video_file = video_name + ".mp4"
    video_path = os.path.join(video_dir, video_playlist, video_file)

video = cv2.VideoCapture(video_path)
Пример #19
0
import os
import json
from dbimpl import DBImpl
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
sys.path.append('..')
from setting import *
from util import correct_non_ascii
from OCR.adjust_ocr import GoogleOCRParser, diff_lines
from OCR.JavaLine import JavaLine
from OCR.lm import JAVA_WORDS, JAVA_LINE_STRUCTURE

db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
sql = 'select a.hash, a.title from videos a, playlists b where a.playlist = b.id and a.used = 1 and b.used = 1'
res = db.querymany(sql)

video_folders = []
video_hash_map = {}
for video_hash, video_title in res:
    video_folders.append((video_title.strip(), video_hash))
    video_hash_map[video_hash] = video_title


# baseline
def construct_index_with_noise():
    all_docs = []
    video_track = {}
    num = 0

    all_frame_docs = []
    frame_track = {}
Пример #20
0
import os, sys
import json
import cv2
from darkflow.net.build import TFNet
from json import JSONEncoder, JSONDecoder
import pickle
import numpy

sys.path.append('../')
from dbimpl import DBImpl
from setting import *



db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})

class PythonObjectEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (list, dict, str, unicode, int, float, bool, type(None))):
            return JSONEncoder.default(self, obj)
        elif isinstance(obj, numpy.floating):
            return float(obj)
        return {'_python_object': pickle.dumps(obj)}


def predict():
    options = {"model": "cfg/tiny-yolo-voc-1c.cfg",
            "load": -1,
            'threshold': 0.1
            #    "gpu": 1.0
            }
Пример #21
0
def extract_frames(video_hash):
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select title, playlist from videos where hash = ?'
    res = db.queryone(sql, video_hash)
    video_name = res[0].strip()
    video_playlist = res[1].strip()

    video_file = video_name + "_" + video_hash + ".mp4"
    video_path = os.path.join(video_dir, video_playlist, video_file)

    if (not os.path.exists(video_path)):
        video_file = video_name + ".mp4"
        video_path = os.path.join(video_dir, video_playlist, video_file)

    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    # fps = math.ceil(fps)
    # fps = 30

    frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
    print('video fps/frame count:', fps, "/", frame_count)

    extract_folder = '../public/Images/%s_%s' % (video_name, video_hash)
    main_folder = '../public/extracts/%s_%s/main' % (video_name, video_hash)

    second = 1

    filter_frames = []
    frame_seg = {}
    seg_frame = {}
    while True:
        frame_num = math.ceil(second * fps) + 1
        for seg in range(1, 4):
            file_path = os.path.join(
                main_folder, 'frame%d-segment%d.txt' % (frame_num, seg))
            print(file_path)
            if os.path.exists(file_path):
                # print 'found', frame_num
                if frame_num not in filter_frames:
                    filter_frames.append(frame_num)
                    frame_seg[frame_num] = [seg]
                else:
                    frame_seg[frame_num].append(seg)

                if seg in seg_frame:
                    seg_frame[seg].append(frame_num)
                else:
                    seg_frame[seg] = [frame_num]

        second += 1

        if frame_num > frame_count:
            break

    # print filter_frames
    # print [int(math.floor((f)/fps)) for f in filter_frames]

    for f in frame_seg:
        if len(frame_seg[f]) > 1:
            print f

    for s in seg_frame:
        print(s, len(seg_frame[s]))