def video_init(opt, path): fps, endtime, height, width = ffmpeg.get_video_infos(path) if opt.fps != 0: fps = opt.fps continue_flag = False imagepaths = [] if os.path.isdir(opt.temp_dir): imagepaths = os.listdir(opt.temp_dir + '/video2image') if imagepaths != []: imagepaths.sort() last_frame = int(imagepaths[-1][7:13]) if (opt.last_time != '00:00:00' and last_frame > fps*(util.stamp2second(opt.last_time)-1)) \ or (opt.last_time == '00:00:00' and last_frame > fps*(endtime-1)): choose = input( 'There is an unfinished video. Continue it? [y/n] ') if choose.lower() == 'yes' or choose.lower() == 'y': continue_flag = True if not continue_flag: print('Step:1/4 -- Convert video to images') util.file_init(opt) ffmpeg.video2voice(path, opt.temp_dir + '/voice_tmp.mp3', opt.start_time, opt.last_time) ffmpeg.video2image( path, opt.temp_dir + '/video2image/output_%06d.' + opt.tempimage_type, fps, opt.start_time, opt.last_time) imagepaths = os.listdir(opt.temp_dir + '/video2image') imagepaths.sort() return fps, imagepaths, height, width
def process_video(videopath, savedir, min_interval_time=0.1, crop_mode='peak', crop_time=0.2, rate=44100, fc=[20,8000], saveimage=True): """ videopath: savedir: min_interval_time: crop_mode: 'peak' | 'time' crop_time: rate: saveimage: return: video_infos :fps,endtime,height,width peakindexs bias syllables """ util.makedirs(savedir) # process video video_infos = ffmpeg.get_video_infos(videopath) ffmpeg.video2voice(videopath, os.path.join(savedir,'video_tmp.wav'),samplingrate=44100) if saveimage: util.makedirs(os.path.join(savedir,'imgs')) ffmpeg.video2image(videopath,os.path.join(savedir,'imgs','%05d.png')) # process audio audio,syllables,features,peakindexs,bias = process_audio(os.path.join(savedir,'video_tmp.wav'), savedir, min_interval_time,crop_mode, crop_time, rate, fc) np.save(os.path.join(savedir,'video_infos.npy'), np.array(video_infos)) return audio,syllables,features,peakindexs,bias,video_infos
def video_init(opt, path): fps, endtime, height, width = ffmpeg.get_video_infos(path) if opt.fps != 0: fps = opt.fps # resume if os.path.isfile(os.path.join(opt.temp_dir, 'step.json')): step = util.loadjson(os.path.join(opt.temp_dir, 'step.json')) if int(step['step']) >= 1: choose = input('There is an unfinished video. Continue it? [y/n] ') if choose.lower() == 'yes' or choose.lower() == 'y': imagepaths = os.listdir(opt.temp_dir + '/video2image') imagepaths.sort() return fps, imagepaths, height, width print('Step:1/4 -- Convert video to images') util.file_init(opt) ffmpeg.video2voice(path, opt.temp_dir + '/voice_tmp.mp3', opt.start_time, opt.last_time) ffmpeg.video2image( path, opt.temp_dir + '/video2image/output_%06d.' + opt.tempimage_type, fps, opt.start_time, opt.last_time) imagepaths = os.listdir(opt.temp_dir + '/video2image') imagepaths.sort() step = {'step': 2, 'frame': 0} util.savejson(os.path.join(opt.temp_dir, 'step.json'), step) return fps, imagepaths, height, width
def process_audio(audiopath, savedir, min_interval_time=0.1, crop_mode='peak', crop_time=0.2, rate=44100, fc=[20,8000], hpss=''): util.makedirs(savedir) # to wav if (os.path.splitext(audiopath)[1]).lower() != '.wav': ffmpeg.video2voice(audiopath, os.path.join(savedir,'video_tmp.wav'),samplingrate=44100) audiopath = os.path.join(savedir,'video_tmp.wav') _,audio = sound.load(audiopath,ch=0) _audio = audio.copy() if hpss == 'harmonic': harmonic,percussive = librosa.effects.hpss(_audio) energy = dsp.energy(sound.filter(harmonic,fc,rate), 4410, 441, 4410) elif hpss == 'percussive': harmonic,percussive = librosa.effects.hpss(_audio) energy = dsp.energy(sound.filter(percussive,fc,rate), 4410, 441, 4410) else: energy = dsp.energy(sound.filter(_audio,fc,rate), 4410, 441, 4410) peakindexs = arrop.findpeak(energy,interval = int(min_interval_time*100)) y = arrop.get_y(peakindexs, energy) plt.plot(energy) plt.scatter(peakindexs,y,c='orange') plt.show() peakindexs = peakindexs*441 bias = [] if crop_mode == 'peak': valleyindexs = arrop.findpeak(energy,interval = int(min_interval_time*100),reverse=True)*441 for i in range(len(peakindexs)): for j in range(len(valleyindexs)-1): if valleyindexs[j] < peakindexs[i]: if valleyindexs[j+1] > peakindexs[i]: left = np.clip(peakindexs[i]-valleyindexs[j],int(min_interval_time*rate*0.5),int(min_interval_time*rate*5)) right = np.clip(valleyindexs[j+1]-peakindexs[i],int(min_interval_time*rate*0.5),int(min_interval_time*rate*5)) bias.append([left,right]) elif crop_mode == 'time': for i in range(len(peakindexs)): bias.append([int(rate*crop_time/2),int(rate*crop_time/2)]) syllables = [] features = [] for i in range(len(peakindexs)): syllable = audio[peakindexs[i]-bias[i][0]:peakindexs[i]+bias[i][1]] syllables.append(syllable) features.append(sound.freqfeatures(syllable, 44100)) # save np.save(os.path.join(savedir,'peakindexs.npy'), np.array(peakindexs)) np.save(os.path.join(savedir,'bias.npy'), np.array(bias)) np.save(os.path.join(savedir,'syllables.npy'), np.array(syllables)) np.save(os.path.join(savedir,'features.npy'), np.array(features)) # for syllable in syllables: # sound.playtest(syllable) return audio,syllables,features,peakindexs,bias
def video_init(opt,path): util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] ffmpeg.video2voice(path,'./tmp/voice_tmp.mp3') ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type) imagepaths=os.listdir('./tmp/video2image') imagepaths.sort() return fps,imagepaths
def video_init(opt,path): util.clean_tempfiles() fps,endtime,height,width = ffmpeg.get_video_infos(path) if opt.fps !=0: fps = opt.fps ffmpeg.video2voice(path,'./tmp/voice_tmp.mp3') ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type,fps) imagepaths=os.listdir('./tmp/video2image') imagepaths.sort() return fps,imagepaths,height,width
def video_init(opt, path): util.clean_tempfiles(opt) fps, endtime, height, width = ffmpeg.get_video_infos(path) if opt.fps != 0: fps = opt.fps ffmpeg.video2voice(path, opt.temp_dir + '/voice_tmp.mp3', opt.start_time, opt.last_time) ffmpeg.video2image( path, opt.temp_dir + '/video2image/output_%06d.' + opt.tempimage_type, fps, opt.start_time, opt.last_time) imagepaths = os.listdir(opt.temp_dir + '/video2image') imagepaths.sort() return fps, imagepaths, height, width
def cleanmosaic_video_byframe(opt): netG = loadmodel.pix2pix(opt) net_mosaic_pos = loadmodel.unet_clean(opt) path = opt.media_path util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] ffmpeg.video2voice(path, './tmp/voice_tmp.mp3') ffmpeg.video2image(path, './tmp/video2image/output_%05d.' + opt.tempimage_type) positions = [] imagepaths = os.listdir('./tmp/video2image') imagepaths.sort() # get position for imagepath in imagepaths: img_origin = impro.imread(os.path.join('./tmp/video2image', imagepath)) x, y, size = runmodel.get_mosaic_position(img_origin, net_mosaic_pos, opt)[:3] positions.append([x, y, size]) print('Find mosaic location:', imagepath) print('Optimize mosaic locations...') positions = np.array(positions) for i in range(3): positions[:, i] = filt.medfilt(positions[:, i], opt.medfilt_num) # clean mosaic for i, imagepath in enumerate(imagepaths, 0): x, y, size = positions[i][0], positions[i][1], positions[i][2] img_origin = impro.imread(os.path.join('./tmp/video2image', imagepath)) img_result = img_origin.copy() if size != 0: img_mosaic = img_origin[y - size:y + size, x - size:x + size] img_fake = runmodel.run_pix2pix(img_mosaic, netG, opt) img_result = impro.replace_mosaic(img_origin, img_fake, x, y, size, opt.no_feather) cv2.imwrite(os.path.join('./tmp/replace_mosaic', imagepath), img_result) print('Clean Mosaic:', imagepath) ffmpeg.image2video( fps, './tmp/replace_mosaic/output_%05d.' + opt.tempimage_type, './tmp/voice_tmp.mp3', os.path.join( opt.result_dir, os.path.splitext(os.path.basename(path))[0] + '_clean.mp4'))
def addmosaic_video(opt): net = loadmodel.unet(opt) path = opt.media_path util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] ffmpeg.video2voice(path, './tmp/voice_tmp.mp3') ffmpeg.video2image(path, './tmp/video2image/output_%05d.' + opt.tempimage_type) imagepaths = os.listdir('./tmp/video2image') imagepaths.sort() # get position positions = [] for imagepath in imagepaths: print('Find ROI location:', imagepath) img = impro.imread(os.path.join('./tmp/video2image', imagepath)) mask, x, y, area = runmodel.get_ROI_position(img, net, opt) positions.append([x, y, area]) cv2.imwrite(os.path.join('./tmp/ROI_mask', imagepath), mask) print('Optimize ROI locations...') mask_index = filt.position_medfilt(np.array(positions), 7) # add mosaic print('Add mosaic to images...') for i in range(len(imagepaths)): mask = impro.imread( os.path.join('./tmp/ROI_mask', imagepaths[mask_index[i]])) img = impro.imread(os.path.join('./tmp/video2image', imagepaths[i])) img = mosaic.addmosaic(img, mask, opt) cv2.imwrite(os.path.join('./tmp/addmosaic_image', imagepaths[i]), img) ffmpeg.image2video( fps, './tmp/addmosaic_image/output_%05d.' + opt.tempimage_type, './tmp/voice_tmp.mp3', os.path.join(opt.result_dir, os.path.splitext(os.path.basename(path))[0] + '_add.mp4'))
STEP = ['generate_video'] #'preprocess','matchtest','generate_video' or 'full' FPS = 24 CorrectFreq = False CorrectEnergy = True ShowPeak = True SamplingRate = 44100 IntervalTime = 0.03 #video2voice if 'preprocess' in STEP or 'full' in STEP: util.clean_tempfiles(tmp_init=True) names = os.listdir(video_dir) for i, name in enumerate(names, 0): video_path = os.path.join(video_dir, name) ffmpeg.video2voice( video_path, os.path.join('./tmp/video_voice', name.replace('mp4', 'wav')), 'wav') img_dir = os.path.join('./tmp/video_imgs', '%02d' % i) util.makedirs(img_dir) ffmpeg.video2image(video_path, os.path.join(img_dir, '%05d.jpg')) ffmpeg.video2voice(music_path, './tmp/music/music.wav', 'wav') if 'matchtest' in STEP or 'generate_video' in STEP or 'full' in STEP: ''' dst crop and get features ''' print('loading...') names = os.listdir('./tmp/video_voice') names.sort()
img_origin = cv2.imread(path) x, y, size = get_mosaic_position(img_origin) img_result = img_origin.copy() if size != 0: img_mosaic = img_origin[y - size:y + size, x - size:x + size] img_fake = runmodel.run_pix2pix(img_mosaic, netG, use_gpu=opt.use_gpu) img_result = replace_mosaic(img_origin, img_fake, x, y, size) cv2.imwrite(os.path.join(opt.result_dir, os.path.basename(path)), img_result) elif util.is_video(path): util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] ffmpeg.video2voice(path, './tmp/voice_tmp.mp3') ffmpeg.video2image( path, './tmp/video2image/output_%05d.' + opt.tempimage_type) positions = [] imagepaths = os.listdir('./tmp/video2image') imagepaths.sort() for imagepath in imagepaths: imagepath = os.path.join('./tmp/video2image', imagepath) img_origin = cv2.imread(imagepath) x, y, size = get_mosaic_position(img_origin) positions.append([x, y, size]) print('Find Positions:', imagepath) positions = np.array(positions) for i in range(3): positions[:, i] = signal.medfilt(positions[:, i], opt.medfilt_num)
import os import numpy as np from scipy.io import wavfile import matplotlib.pylab as plt import random import scipy.signal import time import librosa from util import util, ffmpeg, dsp, sound, notation dataset = './dataset/诸葛亮' video_names = os.listdir(dataset) video_names.sort() util.clean_tempfiles(tmp_init=False) util.makedirs('./tmp/voice') for i in range(len(video_names)): ffmpeg.video2voice(os.path.join(dataset, video_names[i]), os.path.join('./tmp/voice', '%03d' % i + '.wav'), samplingrate=44100) voice = sound.load(os.path.join('./tmp/voice', '%03d' % i + '.wav'))[1] base_freq = sound.basefreq(voice, 44100, 5000, mode='mean') print(video_names[i]) print('basefreq:', base_freq) print('note:', librosa.hz_to_note(base_freq)) f, fft = dsp.showfreq(voice, 44100, 5000) plt.plot(f, fft) plt.show()
def cleanmosaic_video_fusion(opt): net = loadmodel.pix2pix(opt) net_mosaic_pos = loadmodel.unet_clean(opt) path = opt.media_path N = 25 util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] ffmpeg.video2voice(path, './tmp/voice_tmp.mp3') ffmpeg.video2image(path, './tmp/video2image/output_%05d.' + opt.tempimage_type) positions = [] imagepaths = os.listdir('./tmp/video2image') imagepaths.sort() # get position for imagepath in imagepaths: img_origin = impro.imread(os.path.join('./tmp/video2image', imagepath)) # x,y,size = runmodel.get_mosaic_position(img_origin,net_mosaic_pos,opt)[:3] x, y, size, mask = runmodel.get_mosaic_position( img_origin, net_mosaic_pos, opt) cv2.imwrite(os.path.join('./tmp/mosaic_mask', imagepath), mask) positions.append([x, y, size]) print('Find mosaic location:', imagepath) print('Optimize mosaic locations...') positions = np.array(positions) for i in range(3): positions[:, i] = filt.medfilt(positions[:, i], opt.medfilt_num) # clean mosaic print('Clean mosaic...') for i, imagepath in enumerate(imagepaths, 0): print('Clean mosaic:', imagepath) x, y, size = positions[i][0], positions[i][1], positions[i][2] img_origin = impro.imread(os.path.join('./tmp/video2image', imagepath)) mask = cv2.imread(os.path.join('./tmp/mosaic_mask', imagepath), 0) if size == 0: cv2.imwrite(os.path.join('./tmp/replace_mosaic', imagepath), img_origin) else: mosaic_input = np.zeros((256, 256, 3 * N + 1), dtype='uint8') for j in range(0, N): img = impro.imread( os.path.join( './tmp/video2image', imagepaths[np.clip(i + j - 12, 0, len(imagepaths) - 1)])) img = img[y - size:y + size, x - size:x + size] img = impro.resize(img, 256) mosaic_input[:, :, j * 3:(j + 1) * 3] = img mask = impro.resize(mask, np.min(img_origin.shape[:2])) mask = mask[y - size:y + size, x - size:x + size] mask = impro.resize(mask, 256) mosaic_input[:, :, -1] = mask mosaic_input = data.im2tensor(mosaic_input, bgr2rgb=False, use_gpu=opt.use_gpu, use_transform=False) unmosaic_pred = net(mosaic_input) unmosaic_pred = (unmosaic_pred.cpu().detach().numpy() * 255)[0] img_fake = unmosaic_pred.transpose((1, 2, 0)) img_result = impro.replace_mosaic(img_origin, img_fake, x, y, size, opt.no_feather) cv2.imwrite(os.path.join('./tmp/replace_mosaic', imagepath), img_result) ffmpeg.image2video( fps, './tmp/replace_mosaic/output_%05d.' + opt.tempimage_type, './tmp/voice_tmp.mp3', os.path.join( opt.result_dir, os.path.splitext(os.path.basename(path))[0] + '_clean.mp4'))
EnergyAlpha = 0.3 STEP = ['generate_video'] #'preprocess','matchtest','generate_video' or 'full' FPS = 24 CorrectFreq = False CorrectEnergy = True ShowPeak = True SamplingRate = 44100 IntervalTime = 0.07 util.clean_tempfiles(tmp_init=True) ''' ---------------------------------Video Preprocess--------------------------------- ''' fps, endtime, height, width = ffmpeg.get_video_infos(videopath) print(fps, endtime, height, width) ffmpeg.video2voice(videopath, './tmp/video_tmp.wav', samplingrate=44100) _, audio = sound.load('./tmp/video_tmp.wav', ch=0) energy = dsp.energy(audio, 4410, 441, 4410) indexs = arrop.findpeak(energy, interval=int(IntervalTime * 100)) reverse_indexs = arrop.findpeak(energy, interval=int(IntervalTime * 100 * 0.5), reverse=True) # syllables = [] # for i in range(len(indexs)): # for j in range(len(reverse_indexs)-1): # if reverse_indexs[j] < indexs[i]: # if reverse_indexs[j+1] > indexs[i]: # syllables.append(audio[reverse_indexs[j]*441:reverse_indexs[j+1]*441])
def main(): if opt.mode == 'add': net = loadmodel.unet(opt) path = opt.media_path if util.is_img(path): print('Add Mosaic:', path) img = impro.imread(path) mask = runmodel.get_ROI_position(img, net, opt)[0] img = mosaic.addmosaic(img, mask, opt) cv2.imwrite(os.path.join(opt.result_dir, os.path.basename(path)), img) elif util.is_video(path): util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] ffmpeg.video2voice(path, './tmp/voice_tmp.mp3') ffmpeg.video2image( path, './tmp/video2image/output_%05d.' + opt.tempimage_type) imagepaths = os.listdir('./tmp/video2image') imagepaths.sort() # get position positions = [] for imagepath in imagepaths: imagepath = os.path.join('./tmp/video2image', imagepath) print('Find ROI location:', imagepath) img = impro.imread(imagepath) mask, x, y, area = runmodel.get_ROI_position(img, net, opt) positions.append([x, y, area]) cv2.imwrite( os.path.join('./tmp/ROI_mask', os.path.basename(imagepath)), mask) print('Optimize ROI locations...') mask_index = filt.position_medfilt(np.array(positions), 7) # add mosaic print('Add mosaic to images...') for i in range(len(imagepaths)): mask_path = os.path.join('./tmp/ROI_mask', imagepaths[mask_index[i]]) mask = impro.imread(mask_path) img = impro.imread( os.path.join('./tmp/video2image', imagepaths[i])) img = mosaic.addmosaic(img, mask, opt) cv2.imwrite( os.path.join('./tmp/addmosaic_image', os.path.basename(imagepaths[i])), img) ffmpeg.image2video( fps, './tmp/addmosaic_image/output_%05d.' + opt.tempimage_type, './tmp/voice_tmp.mp3', os.path.join( opt.result_dir, os.path.splitext(os.path.basename(path))[0] + '_add.mp4')) elif opt.mode == 'clean': netG = loadmodel.pix2pix(opt) net_mosaic_pos = loadmodel.unet_clean(opt) path = opt.media_path if util.is_img(path): print('Clean Mosaic:', path) img_origin = impro.imread(path) x, y, size = runmodel.get_mosaic_position(img_origin, net_mosaic_pos, opt) img_result = img_origin.copy() if size != 0: img_mosaic = img_origin[y - size:y + size, x - size:x + size] img_fake = runmodel.run_pix2pix(img_mosaic, netG, opt) img_result = impro.replace_mosaic(img_origin, img_fake, x, y, size, opt.no_feather) cv2.imwrite(os.path.join(opt.result_dir, os.path.basename(path)), img_result) elif util.is_video(path): util.clean_tempfiles() fps = ffmpeg.get_video_infos(path)[0] ffmpeg.video2voice(path, './tmp/voice_tmp.mp3') ffmpeg.video2image( path, './tmp/video2image/output_%05d.' + opt.tempimage_type) positions = [] imagepaths = os.listdir('./tmp/video2image') imagepaths.sort() # get position for imagepath in imagepaths: imagepath = os.path.join('./tmp/video2image', imagepath) img_origin = impro.imread(imagepath) x, y, size = runmodel.get_mosaic_position( img_origin, net_mosaic_pos, opt) positions.append([x, y, size]) print('Find mosaic location:', imagepath) print('Optimize mosaic locations...') positions = np.array(positions) for i in range(3): positions[:, i] = filt.medfilt(positions[:, i], opt.medfilt_num) # clean mosaic for i, imagepath in enumerate(imagepaths, 0): imagepath = os.path.join('./tmp/video2image', imagepath) x, y, size = positions[i][0], positions[i][1], positions[i][2] img_origin = impro.imread(imagepath) img_result = img_origin.copy() if size != 0: img_mosaic = img_origin[y - size:y + size, x - size:x + size] img_fake = runmodel.run_pix2pix(img_mosaic, netG, opt) img_result = impro.replace_mosaic(img_origin, img_fake, x, y, size, opt.no_feather) cv2.imwrite( os.path.join('./tmp/replace_mosaic', os.path.basename(imagepath)), img_result) print('Clean Mosaic:', imagepath) ffmpeg.image2video( fps, './tmp/replace_mosaic/output_%05d.' + opt.tempimage_type, './tmp/voice_tmp.mp3', os.path.join( opt.result_dir, os.path.splitext(os.path.basename(path))[0] + '_clean.mp4')) util.clean_tempfiles(tmp_init=False)
img_scale = h_media/w_media/opt.char_scale if img_scale >= screen_scale: strshape = (limh,int(limh/img_scale)) else: strshape = (int(limw*img_scale),limw) #-------------------------------img2shell Init------------------------------- transformer = Transformer(strshape,(limh,limw),opt.charstyle) if util.is_video(opt.media): recommend_fps = transformer.eval_performance(opt.gray) if opt.fps == 0: opt.fps = np.clip(recommend_fps,1,opt.ori_fps) else: opt.fps = np.clip(opt.fps,1,opt.ori_fps) if system_type == 'Linux': ffmpeg.video2voice(opt.media,'-ar 16000 ./tmp/tmp.wav') #-------------------------------main------------------------------- if __name__ == '__main__': if system_type == 'Windows': multiprocessing.freeze_support() if util.is_img(opt.media): print(transformer.convert(img,opt.gray)) elif util.is_video(opt.media): imgQueue = Queue(1) timerQueue = Queue() imgload_p = Process(target=readvideo,args=(opt,imgQueue)) imgload_p.daemon = True imgload_p.start()