def main(DIRECTORY, args): print("[Start] making audio for the training.") TR_SPEECH_PATH = DIRECTORY['TR_SPEECH_PATH'] TR_NOISE_PATH = DIRECTORY['TR_NOISE_PATH'] OUTPUT_PATH = DIRECTORY['OUTPUT_PATH'] # set output path merged_output_path = os.path.join(OUTPUT_PATH,'merged_audio') # make output path makedirs(merged_output_path) # make merged speech audio using librosa speech_merged_audio,rate = get_merged_audio(TR_SPEECH_PATH, audio_type='speech', args=args) # make merged noise audio using librosa noise_merged_audio,rate = get_merged_audio(TR_NOISE_PATH, audio_type='noise', args=args ) # make RMS of speech and noise equal noise_merged_audio = equalizingRMS(speech_merged_audio, noise_merged_audio) # write wav files write_audio(merged_output_path+'/merged_train_speech_audio.wav', speech_merged_audio, args.sampling_rate) write_audio(merged_output_path+'/merged_train_noise_audio.wav', noise_merged_audio, args.sampling_rate) print("[Finish] making audio for the training.")
def run_openpose(args, video_path): video_idx = path.basename(video_path).split('.')[0] try: img_dir = path.join(args.output_root, args.img_folder, video_idx) op_dir = path.join(args.output_root, args.openpose_folder, video_idx) img_names = sorted(glob.glob(img_dir + '/*.jpg')) op_names = sorted(glob.glob(op_dir + '/*.json')) # If the frames haven't been extracted or OpenPose hasn't been run or # finished processing. if (not os.path.isdir(img_dir) or not os.path.isdir(op_dir) or len(img_names) != len(op_names)): makedirs(img_dir) # First run OpenPose on key frames, then decide whether to run # the whole batch of frames. extract_key_frames(args, video_path, img_dir) run_openpose_cmd(args, video_idx) # If key frame looks good, extract all frames in the batch and # run OpenPose. if args.n_skip_frames > 1: extract_valid_frames(args, video_path, img_dir) run_openpose_cmd(args, video_idx) # Remove all unusable frames. remove_invalid_frames(args, video_idx) remove_static_frames(args, video_idx) remove_isolated_frames(args, video_idx) except: raise ValueError('video %s running openpose failed' % video_idx)
def process_video(videopath, savedir, min_interval_time=0.1, crop_mode='peak', crop_time=0.2, rate=44100, fc=[20,8000], saveimage=True): """ videopath: savedir: min_interval_time: crop_mode: 'peak' | 'time' crop_time: rate: saveimage: return: video_infos :fps,endtime,height,width peakindexs bias syllables """ util.makedirs(savedir) # process video video_infos = ffmpeg.get_video_infos(videopath) ffmpeg.video2voice(videopath, os.path.join(savedir,'video_tmp.wav'),samplingrate=44100) if saveimage: util.makedirs(os.path.join(savedir,'imgs')) ffmpeg.video2image(videopath,os.path.join(savedir,'imgs','%05d.png')) # process audio audio,syllables,features,peakindexs,bias = process_audio(os.path.join(savedir,'video_tmp.wav'), savedir, min_interval_time,crop_mode, crop_time, rate, fc) np.save(os.path.join(savedir,'video_infos.npy'), np.array(video_infos)) return audio,syllables,features,peakindexs,bias,video_infos
def process_audio(audiopath, savedir, min_interval_time=0.1, crop_mode='peak', crop_time=0.2, rate=44100, fc=[20,8000], hpss=''): util.makedirs(savedir) # to wav if (os.path.splitext(audiopath)[1]).lower() != '.wav': ffmpeg.video2voice(audiopath, os.path.join(savedir,'video_tmp.wav'),samplingrate=44100) audiopath = os.path.join(savedir,'video_tmp.wav') _,audio = sound.load(audiopath,ch=0) _audio = audio.copy() if hpss == 'harmonic': harmonic,percussive = librosa.effects.hpss(_audio) energy = dsp.energy(sound.filter(harmonic,fc,rate), 4410, 441, 4410) elif hpss == 'percussive': harmonic,percussive = librosa.effects.hpss(_audio) energy = dsp.energy(sound.filter(percussive,fc,rate), 4410, 441, 4410) else: energy = dsp.energy(sound.filter(_audio,fc,rate), 4410, 441, 4410) peakindexs = arrop.findpeak(energy,interval = int(min_interval_time*100)) y = arrop.get_y(peakindexs, energy) plt.plot(energy) plt.scatter(peakindexs,y,c='orange') plt.show() peakindexs = peakindexs*441 bias = [] if crop_mode == 'peak': valleyindexs = arrop.findpeak(energy,interval = int(min_interval_time*100),reverse=True)*441 for i in range(len(peakindexs)): for j in range(len(valleyindexs)-1): if valleyindexs[j] < peakindexs[i]: if valleyindexs[j+1] > peakindexs[i]: left = np.clip(peakindexs[i]-valleyindexs[j],int(min_interval_time*rate*0.5),int(min_interval_time*rate*5)) right = np.clip(valleyindexs[j+1]-peakindexs[i],int(min_interval_time*rate*0.5),int(min_interval_time*rate*5)) bias.append([left,right]) elif crop_mode == 'time': for i in range(len(peakindexs)): bias.append([int(rate*crop_time/2),int(rate*crop_time/2)]) syllables = [] features = [] for i in range(len(peakindexs)): syllable = audio[peakindexs[i]-bias[i][0]:peakindexs[i]+bias[i][1]] syllables.append(syllable) features.append(sound.freqfeatures(syllable, 44100)) # save np.save(os.path.join(savedir,'peakindexs.npy'), np.array(peakindexs)) np.save(os.path.join(savedir,'bias.npy'), np.array(bias)) np.save(os.path.join(savedir,'syllables.npy'), np.array(syllables)) np.save(os.path.join(savedir,'features.npy'), np.array(features)) # for syllable in syllables: # sound.playtest(syllable) return audio,syllables,features,peakindexs,bias
def run_densepose(args, video_idx): try: img_dir = path.join(args.output_root, args.img_folder, video_idx) dp_dir = path.join(args.output_root, args.densepose_folder, video_idx) img_names = sorted(glob.glob(img_dir + '/*.jpg')) dp_names = sorted(glob.glob(dp_dir + '/*.png')) if not os.path.isdir(dp_dir) or len(img_names) != len(dp_names): makedirs(dp_dir) # Run densepose. run_densepose_cmd(args, video_idx) except: raise ValueError('video %s running densepose failed' % video_idx)
def extract_all_frames(args, video_path): video_idx = path.basename(video_path).split('.')[0] img_dir = path.join(args.output_root, args.img_folder, video_idx) makedirs(img_dir) vidcap = cv2.VideoCapture(video_path) success, image = vidcap.read() frame_count = 0 while success: write_name = path.join(img_dir, "frame%06d.jpg" % frame_count) cv2.imwrite(write_name, image) success, image = vidcap.read() frame_count += 1 print('Extracted %d frames' % frame_count)
def run_openpose_cmd(args, video_idx): pwd = os.getcwd() img_dir = path.join(pwd, args.output_root, args.img_folder, video_idx) op_dir = path.join(pwd, args.output_root, args.openpose_folder, video_idx) render_dir = path.join(pwd, args.output_root, args.openpose_folder + '_rendered', video_idx) makedirs(op_dir) makedirs(render_dir) cmd = 'cd %s; ./build/examples/openpose/openpose.bin --display 0 ' \ '--disable_blending --image_dir %s --write_images %s --face --hand ' \ '--face_render_threshold 0.1 --hand_render_threshold 0.02 ' \ '--write_json %s; cd %s' \ % (args.openpose_root, img_dir, render_dir, op_dir, path.join(pwd, args.output_root)) os.system(cmd)
def handlepost(): if request.form['token'] != key: return {'return': 'token error'} if request.form['mode'] == 'clean': if os.path.isdir(opt.rec_tmp): shutil.rmtree(opt.rec_tmp) return {'return': 'done'} if request.form['mode'] == 'send': data = request.form['data'] util.makedirs(os.path.join(opt.rec_tmp, request.form['label'])) util.savetxt( data, os.path.join(opt.rec_tmp, request.form['label'], util.randomstr(8))) return {'return': 'done'} if request.form['mode'] == 'train': train(opt) label_map = {} for i in range(len(categorys)): label_map[categorys[i]] = i file = util.loadfile(os.path.join(opt.save_dir, 'model.pt')) file = base64.b64encode(file).decode('utf-8') heatmap = util.loadfile(os.path.join(opt.save_dir, 'final_heatmap.png')) heatmap = base64.b64encode(heatmap).decode('utf-8') return { 'return': 'done', 'report': 'macro-prec,reca,F1,err,kappa:' + str(statistics.report(core.confusion_mats[-1])), 'label_map': json.dumps(label_map), 'heatmap': heatmap, 'network': file } return {'return': 'error'}
def main(DIRECTORY, args): print("[Start] making audio for the test.") TE_SPEECH_PATH = DIRECTORY['TE_SPEECH_PATH'] TE_NOISE_PATH = DIRECTORY['TE_NOISE_PATH'] OUTPUT_PATH = DIRECTORY['OUTPUT_PATH'] # set output path output_path = os.path.join(OUTPUT_PATH, 'test_noisy_audio') # make output path makedirs(output_path) speech_names = [ na for na in os.listdir(TE_SPEECH_PATH) if na.lower().endswith(".wav") ] noise_names = [ na for na in os.listdir(TE_NOISE_PATH) if na.lower().endswith(".wav") ] for speech_na in speech_names: speech_path = os.path.join(TE_SPEECH_PATH, speech_na) speech, _ = read_audio(speech_path, target_fs=args.sampling_rate) for noise_na in noise_names: noise_path = os.path.join(TE_NOISE_PATH, noise_na) noise, _ = read_audio(noise_path, target_fs=args.sampling_rate) noise = equalizingRMS(ref=speech, target=noise) noisy = addNoise(speech=speech, noise=noise, snr=0) noisy_na = os.path.join("%s_%s.wav" % (os.path.splitext(speech_na)[0], os.path.splitext(noise_na)[0])) noisy_path = os.path.join(output_path, noisy_na) write_audio(noisy_path, noisy, args.sampling_rate) print("[Finish] making audio for the test.")
import os import sys sys.path.append("..") from cores import Options from util import util, ffmpeg opt = Options() opt.parser.add_argument('--datadir', type=str, default='', help='your video dir') opt.parser.add_argument('--savedir', type=str, default='../datasets/video2image', help='') opt = opt.getparse() files = util.Traversal(opt.datadir) videos = util.is_videos(files) util.makedirs(opt.savedir) for video in videos: ffmpeg.continuous_screenshot(video, opt.savedir, opt.fps)
opt.parser.add_argument('--perload_num',type=int,default=16, help='') opt.parser.add_argument('--norm',type=str,default='instance', help='') opt.parser.add_argument('--maxiter',type=int,default=10000000, help='') opt.parser.add_argument('--savefreq',type=int,default=10000, help='') opt.parser.add_argument('--startiter',type=int,default=0, help='') opt.parser.add_argument('--continuetrain', action='store_true', help='') opt.parser.add_argument('--savename',type=str,default='MosaicNet', help='') ''' --------------------------Init-------------------------- ''' opt = opt.getparse() dir_checkpoint = os.path.join('checkpoints/',opt.savename) util.makedirs(dir_checkpoint) util.writelog(os.path.join(dir_checkpoint,'loss.txt'), str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) N = opt.N loss_sum = [0.,0.,0.,0.] loss_plot = [[],[]] item_plot = [] videos = os.listdir('./dataset') videos.sort() lengths = [] print('check dataset...') for video in videos: video_images = os.listdir('./dataset/'+video+'/ori') lengths.append(len(video_images))
from util import image_processing as impro opt = Options() opt.parser.add_argument('--datadir', type=str, default=' ', help='your images dir') opt.parser.add_argument('--savedir', type=str, default='../datasets/draw/face', help='') opt = opt.getparse() mask_savedir = os.path.join(opt.savedir, 'mask') img_savedir = os.path.join(opt.savedir, 'origin_image') util.makedirs(mask_savedir) util.makedirs(img_savedir) filepaths = util.Traversal(opt.datadir) filepaths = util.is_imgs(filepaths) random.shuffle(filepaths) print('find image:', len(filepaths)) # mouse callback function drawing = False # true if mouse is pressed ix, iy = -1, -1 brushsize = 20 def draw_circle(event, x, y, flags, param): global ix, iy, drawing, brushsize
import numpy as np import cv2 import os import sys sys.path.append("..") from util import image_processing as impro from util import util img_dir = './datasets_img/pix2pix/edges2cat/images' output_dir = './datasets_img/pix2pix/edges2cat/train' util.makedirs(output_dir) img_names = os.listdir(img_dir) for i, img_name in enumerate(img_names, 2000): try: img = impro.imread(os.path.join(img_dir, img_name)) img = impro.resize(img, 286) h, w = img.shape[:2] edges = cv2.Canny(img, 150, 250) edges = impro.ch_one2three(edges) out_img = np.zeros((h, w * 2, 3), dtype=np.uint8) out_img[:, 0:w] = edges out_img[:, w:2 * w] = img cv2.imwrite(os.path.join(output_dir, '%05d' % i + '.jpg'), out_img) except Exception as e: pass
def main(DIRECTORY, args): OUTPUT_PATH = DIRECTORY['OUTPUT_PATH'] hop_size = int(args.win_size - args.overlap) # hop size to make spectrogram print("[Start] trainning.. NMF algoritm.") #-------------------train------------------ target_dir = os.path.join(OUTPUT_PATH,'merged_audio') speech_path = os.path.join(target_dir, "merged_train_speech_audio.wav") noise_path = os.path.join(target_dir, "merged_train_noise_audio.wav") # get magnitude of spectrograms V_speech = abs(audio_to_spectrogram(speech_path, args.sampling_rate, args.window, args.fft, hop_size)) V_noise = abs(audio_to_spectrogram(noise_path, args.sampling_rate, args.window, args.fft, hop_size)) # do NMF (V ~= WH) V = np.concatenate((V_speech, V_noise), axis=1) num_basis_train = args.num_basis_speech + args.num_basis_noise W_train, H_train = nmf_train(V, args.max_iter_train, args.epsilon, num_basis_train) print("[End] trainning.. NMF algoritm.") #-------------------test------------------- print("[Start] Test.. NMF algoritm (output: enhanced speech).") test_dir = os.path.join(OUTPUT_PATH,'test_noisy_audio') noisy_names = [na for na in os.listdir(test_dir) if na.lower().endswith(".wav")] output_path = os.path.join(OUTPUT_PATH,'enhanced_audio',str(args.nmf_mode)) makedirs(output_path) for noisy_na in noisy_names: print("%s"%(noisy_na)) noisy_path = os.path.join(test_dir, noisy_na) stft_noisy = audio_to_spectrogram(noisy_path, args.sampling_rate, args.window, args.fft, hop_size) # spectrogram for noisy V_noisy = abs(stft_noisy) # magnitude spectrogram for noisy H_noisy = nmf_test(V_noisy, W_train, H_train, args.max_iter_test, args.epsilon, penalty=args.penalty, algorithm=args.nmf_mode) # new encoding vector obtained through nmf algorithm enhanced_V = wienner_filtering(V_noisy, W_train, H_noisy, args.num_basis_speech, args.p) # magnitude spectrogram for enhanced reconstructed_audio = spectrogram_to_audio(enhanced_V, np.angle(stft_noisy), args.window, hop_size) # reconstruct audio # write enhanced audio out_audio_path = os.path.join(output_path,"enhanced_%s"%noisy_na) write_audio(out_audio_path, reconstructed_audio, args.sampling_rate) #-------------------plot------------------- if args.visualize: # spectrogram for clean speech clean_speech_na = ("_").join(noisy_na.split("_")[0:-1])+os.path.splitext(noisy_na)[-1] clean_speech_path = os.path.join(DIRECTORY['TE_SPEECH_PATH'],clean_speech_na) stft_clean = audio_to_spectrogram(clean_speech_path, args.sampling_rate, args.window, args.fft, hop_size) # visualize fig, axs = plt.subplots(3,1, sharex=False) axs[0].matshow(np.log10(np.abs(stft_noisy)**2), origin='lower', aspect='auto', cmap='jet') axs[1].matshow(np.log10(np.abs(stft_clean)**2), origin='lower', aspect='auto', cmap='jet') axs[2].matshow(np.log10(enhanced_V**2), origin='lower', aspect='auto', cmap='jet') axs[0].set_title("0 db mixture log spectrogram (%s)" % noisy_na) axs[1].set_title("Clean speech log spectrogram") axs[2].set_title("Enhanced speech log spectrogram") for j1 in range(3): axs[j1].xaxis.tick_bottom() plt.tight_layout() fig = plt.gcf() plt.show() if args.plot_save: plot_path = os.path.join(OUTPUT_PATH,"plot",str(args.nmf_mode)) makedirs(plot_path) seg_result_path = os.path.join(plot_path,"%s.png"%(os.path.splitext(noisy_na)[0])) fig.savefig(seg_result_path, dpi=300) print("[End] Test.. NMF algoritm (output: enhanced speech).")
import sys sys.path.append("..") from util import util, transformer, dataloader, statistics, plot, options from util import array_operation as arr from models import creatnet, core # -----------------------------Init----------------------------- opt = options.Options() opt.parser.add_argument('--rec_tmp', type=str, default='./server_data/rec_data', help='') opt = opt.getparse() opt.k_fold = 0 opt.save_dir = './checkpoints' util.makedirs(opt.save_dir) util.makedirs(opt.rec_tmp) # -----------------------------Load original data----------------------------- signals, labels = dataloader.loaddataset(opt) ori_signals_train,ori_labels_train,ori_signals_eval,ori_labels_eval = \ signals[:opt.fold_index[0]].copy(),labels[:opt.fold_index[0]].copy(),signals[opt.fold_index[0]:].copy(),labels[opt.fold_index[0]:].copy() label_cnt, label_cnt_per, label_num = statistics.label_statistics(labels) opt = options.get_auto_options(opt, label_cnt_per, label_num, ori_signals_train) # -----------------------------def network----------------------------- core = core.Core(opt) core.network_init(printflag=True)
import os import numpy as np from scipy.io import wavfile import matplotlib.pylab as plt import random import scipy.signal import time import librosa from util import util, ffmpeg, dsp, sound, notation dataset = './dataset/诸葛亮' video_names = os.listdir(dataset) video_names.sort() util.clean_tempfiles(tmp_init=False) util.makedirs('./tmp/voice') for i in range(len(video_names)): ffmpeg.video2voice(os.path.join(dataset, video_names[i]), os.path.join('./tmp/voice', '%03d' % i + '.wav'), samplingrate=44100) voice = sound.load(os.path.join('./tmp/voice', '%03d' % i + '.wav'))[1] base_freq = sound.basefreq(voice, 44100, 5000, mode='mean') print(video_names[i]) print('basefreq:', base_freq) print('note:', librosa.hz_to_note(base_freq)) f, fft = dsp.showfreq(voice, 44100, 5000) plt.plot(f, fft) plt.show()
import cv2 import numpy as np import datetime import os import random import sys sys.path.append("..") from util import util from util import image_processing as impro image_dir = './datasets_img/v2im' mask_dir = './datasets_img/v2im_mask' util.makedirs(mask_dir) files = os.listdir(image_dir) files_new = files.copy() print('find image:', len(files)) masks = os.listdir(mask_dir) print('mask:', len(masks)) # mouse callback function drawing = False # true if mouse is pressed ix, iy = -1, -1 brushsize = 20 def draw_circle(event, x, y, flags, param): global ix, iy, drawing, brushsize
opt = Options().getparse() system_type = 'Linux' if 'Windows' in platform.platform(): system_type = 'Windows' #-------------------------------Media Init------------------------------- if util.is_img(opt.media): img = cv2.imread(opt.media) h_media,w_media = img.shape[:2] elif util.is_video(opt.media): fps,endtime,h_media,w_media = ffmpeg.get_video_infos(opt.media) if opt.frame_num == 0: opt.frame_num = int(endtime*fps-5) if opt.ori_fps == 0: opt.ori_fps = fps util.makedirs('./tmp') else: print('Can not load this file!') #-------------------------------Image Shape Init------------------------------- if opt.screen==1: limw = 80;limh = 24 if opt.screen==2: limw = 132;limh = 43 if opt.screen==3: limw = 203;limh = 55 screen_scale = limh/limw img_scale = h_media/w_media/opt.char_scale if img_scale >= screen_scale: strshape = (limh,int(limh/img_scale))
opt.parser.add_argument('--time', type=int, default=5, help='split video time') opt.parser.add_argument('--minmaskarea', type=int, default=2000, help='') opt.parser.add_argument('--quality', type=int, default=45, help='minimal quality') opt.parser.add_argument('--outsize', type=int, default=286, help='') opt.parser.add_argument('--startcnt', type=int, default=0, help='') opt.parser.add_argument('--minsize', type=int, default=96, help='minimal roi size') opt.parser.add_argument('--no_sclectscene', action='store_true', help='') opt = opt.getparse() util.makedirs(opt.savedir) util.writelog( os.path.join(opt.savedir, 'opt.txt'), str(time.asctime(time.localtime(time.time()))) + '\n' + util.opt2str(opt)) videopaths = util.Traversal(opt.datadir) videopaths = util.is_videos(videopaths) random.shuffle(videopaths) # def network net = loadmodel.bisenet(opt, 'roi') result_cnt = opt.startcnt video_cnt = 1 starttime = datetime.datetime.now() for videopath in videopaths:
1@@@@@1 ,@@@@@@@@@@@@@@@@@D M@@@@@1,&@@@D ,@@@@1,M@@@@@1 @@@@@@@@@@@@@@@@@, 1@@@@@1 ,@@@@@@@@@@@@@@@@@, +@@@@@@@@@@@D ,@@@@@@@@@@@D +@@@@@@@@@@@@@@@@@+ 1@@@@@1 ,@@@@@@@@@@@@@@@@n ,M@@@@@@@@@&+n@@@@@@@@@@n n@@@@@@@@@@@@@@@@@D D@@@@1 ,@@@@@@@@@@@@@@M1 1M@@@@@@@@@@@@@@@@@@D, M@@@@@MD1+1nM@@@@@@ ,++ ++++++++++++, +nM@@@@@@@@@@@MD1 nMMD1, 1DMMD ,+1nnnn1+, ''' #HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64 print('Init...') notations = notation.readscore('./music/CroatianRhapsody.txt') dataset = './dataset/诸葛亮' video_names = os.listdir(dataset) video_names.sort() util.clean_tempfiles(tmp_init=False) util.makedirs('./tmp/voice') seed_voices = [] seed_freqs = [] for i in range(len(video_names)): ffmpeg.video2voice(os.path.join(dataset, video_names[i]), os.path.join('./tmp/voice', '%03d' % i + '.wav'), samplingrate=44100) voice = sound.load(os.path.join('./tmp/voice', '%03d' % i + '.wav'))[1] #voice = dsp.bpf(voice, 44100, 20, 2000) base_freq = sound.basefreq(voice, 44100, 4000, mode='mean') seed_voices.append(voice) seed_freqs.append(base_freq) fps, endtime, height, width = ffmpeg.get_video_infos( os.path.join(dataset, video_names[i]))
os.path.join(send_data_dir, labels[i], samples[j])) data = { 'token': opt.token, 'mode': 'send', 'label': labels[i], 'data': txt_data } r = requests.post(opt.url, data) print(r.json()) """Train and get network weight return: {'return' : 'done', # txt 'report' : 'macro-prec,reca,F1,err,kappa:'+str(statistics.report(core.confusion_mats[-1])), # txt 'label_map': {'user_nameA':0,'user_nameB':1,'user_nameC':2} # json 'heatmap' : heatmap, # .png file, encode by base64 'network' : file # .pth file, encode by base64 } """ data = {'token': opt.token, 'mode': 'train'} r = requests.post(opt.url, data, timeout=60) rec_data = r.json() print('report:', rec_data['report']) print('label_map:', rec_data['label_map']) # save model.pt util.makedirs('./client_data') file = base64.b64decode(rec_data['network']) util.savefile(file, './client_data/model.pt') # save heatmap.png file = base64.b64decode(rec_data['heatmap']) util.savefile(file, './client_data/heatmap.png')
sys.exit(0) opt = Options().getparse(test_flag = False) if not os.path.isdir(opt.temp_dir): util.file_init(opt) def saveScriptModel(model,example,savepath): model.cpu() traced_script_module = torch.jit.trace(model, example) # try ScriptModel output = traced_script_module(example) print(output) traced_script_module.save(savepath) savedir = '../cpp/res/models/' util.makedirs(savedir) opt.mosaic_position_model_path = '../pretrained_models/mosaic/mosaic_position.pth' model = loadmodel.bisenet(opt,'mosaic') example = torch.ones((1,3,360,360)) saveScriptModel(model,example,os.path.join(savedir,'mosaic_position.pt')) # def main(): # if os.path.isdir(opt.media_path): # files = util.Traversal(opt.media_path) # else: # files = [opt.media_path] # if opt.mode == 'add':
help='if specified,save mask') opt.parser.add_argument('--outsize', type=int, default=512, help='') opt.parser.add_argument('--fold', type=int, default=1, help='') opt.parser.add_argument('--start', type=int, default=0, help='') opt.parser.add_argument('--minsize', type=int, default=128, help='when [square], minimal roi size') opt.parser.add_argument('--quality', type=int, default=40, help='when [square], minimal quality') opt = opt.getparse() util.makedirs(opt.savedir) util.writelog( os.path.join(opt.savedir, 'opt.txt'), str(time.asctime(time.localtime(time.time()))) + '\n' + util.opt2str(opt)) opt.mod = (opt.mod).split(',') #save dir if opt.hd: train_A_path = os.path.join(opt.savedir, 'train_A') train_B_path = os.path.join(opt.savedir, 'train_B') util.makedirs(train_A_path) util.makedirs(train_B_path) else: train_path = os.path.join(opt.savedir, 'train') util.makedirs(train_path) if opt.savemask:
SamplingRate = 44100 IntervalTime = 0.03 #video2voice if 'preprocess' in STEP or 'full' in STEP: util.clean_tempfiles(tmp_init=True) names = os.listdir(video_dir) for i, name in enumerate(names, 0): video_path = os.path.join(video_dir, name) ffmpeg.video2voice( video_path, os.path.join('./tmp/video_voice', name.replace('mp4', 'wav')), 'wav') img_dir = os.path.join('./tmp/video_imgs', '%02d' % i) util.makedirs(img_dir) ffmpeg.video2image(video_path, os.path.join(img_dir, '%05d.jpg')) ffmpeg.video2voice(music_path, './tmp/music/music.wav', 'wav') if 'matchtest' in STEP or 'generate_video' in STEP or 'full' in STEP: ''' dst crop and get features ''' print('loading...') names = os.listdir('./tmp/video_voice') names.sort() audios = [] for name in names: path = os.path.join('./tmp/video_voice', name) sampling_freq, audio = wavfile.read(path)
import cv2 import os from torchvision import transforms from PIL import Image import random import sys sys.path.append("..") import util.image_processing as impro from util import util, mosaic import datetime import shutil mask_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/mask' img_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/origin_image' output_dir = './datasets_img' util.makedirs(output_dir) HD = True # if false make dataset for pix2pix, if Ture for pix2pix_HD MASK = True # if True, output mask,too OUT_SIZE = 256 FOLD_NUM = 2 Bounding = False if HD: train_A_path = os.path.join(output_dir, 'train_A') train_B_path = os.path.join(output_dir, 'train_B') util.makedirs(train_A_path) util.makedirs(train_B_path) else: train_path = os.path.join(output_dir, 'train') util.makedirs(train_path) if MASK: