def train(pr, gpus, restore = False, restore_opt = True, num_gpus = None, profile = False): print pr gpus = tfu.set_gpus(gpus) with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement = True) sess = tf.InteractiveSession(config = config) gpus = gpus[:num_gpus] model = Model(pr, sess, gpus, profile = profile) model.make_train_model() if restore: model.restore(restore_opt = restore_opt) elif pr.init_path is not None: init_ops = [] if pr.net_type == 'i3d': opt_names = ['Adam', 'beta1_power', 'beta2_power', 'Momentum'] rgb_variable_map = {} for variable in tf.global_variables(): if any(x in variable.name for x in opt_names): print 'Skipping:', variable.name continue if pr.init_from_2d: if variable.name.split('/')[0] == 'RGB': # if 'moving_mean' in variable.name or 'moving_variance' in variable.name: # continue cp_name = ( variable.name .replace('RGB/inception_i3d', 'InceptionV1') .replace('Conv3d', 'Conv2d') .replace('batch_norm', 'BatchNorm') .replace('conv_3d/w', 'weights') .replace(':0', '')) print 'shape of', variable.name, shape(variable) v = tf.get_variable(cp_name, shape(variable)[1:], tf.float32) #rgb_variable_map[cp_name] = variable rgb_variable_map[cp_name] = v n = shape(v, 0) init_ops.append(variable.assign(1.0/float(n) * tf.tile(ed(v, 0), (n, 1, 1, 1, 1)))) else: if variable.name.split('/')[0] == 'RGB': rgb_variable_map[variable.name.replace(':0', '')] = variable rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) rgb_saver.restore(sess, pr.init_path) for x in init_ops: print 'Running:', x sess.run(x) else: print 'Restoring from init_path:', pr.init_path model.restore(pr.init_path, ul_only = True, restore_opt = False) tf.get_default_graph().finalize() model.train()
def find_best_iter(pr, gpu, num_iters=10, sample_rate=10, dset_name='val'): [gpu] = mu.set_gpus([gpu]) best_iter = (np.inf, '') model_paths = sorted( ut.glob(pj(pr.train_dir, 'slow', 'net*.index')), key=lambda x: int(x.split('-')[-1].split('.')[0]))[-5:] model_paths = list(reversed(model_paths)) assert len(model_paths), 'no model paths at %s' % pj( pr.train_dir, 'slow', 'net*.index') for model_path in model_paths: model_path = model_path.split('.index')[0] print model_path clf = NetClf(pr, model_path, gpu=gpu) clf.init() if dset_name == 'train': print 'train' tf_files = sorted(ut.glob(pj(pr.train_list, '*.tf'))) elif dset_name == 'val': tf_files = sorted(ut.glob(pj(pr.val_list, '*.tf'))) else: raise RuntimeError() import sep_eval losses = [] for ims, _, pair in sep_eval.pair_data(tf_files, pr): if abs(hash(pair['ytid_gt'])) % sample_rate == 0: res = clf.predict_unmixed(ims, pair['samples_gt'], pair['samples_bg']) # loss = np.mean(np.abs(res['spec_pred_fg'] - res['spec0'])) # loss += np.mean(np.abs(res['spec_pred_bg'] - res['spec1'])) loss = 0. if 'pit' in pr.loss_types: loss += pit_loss([res['spec0']], [res['spec1']], [res['spec_pred_fg']], [res['spec_pred_bg']], pr) if 'fg-bg' in pr.loss_types: loss += np.mean(np.abs(res['spec_pred_fg'] - res['spec0'])) loss += np.mean(np.abs(res['spec_pred_bg'] - res['spec1'])) losses.append(loss) print 'running:', np.mean(losses) loss = np.mean(losses) print model_path, 'Loss:', loss best_iter = min(best_iter, (loss, model_path)) ut.write_lines(pj(pr.resdir, 'model_path.txt'), [best_iter[1]])
def train(pr, gpus, restore=False, restore_opt=True): print pr gpus = mu.set_gpus(gpus) with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) #config = tf.ConfigProto() sess = tf.InteractiveSession(config=config) model = Model(pr, sess, gpus) model.make_model() if restore: model.restore(restore_opt=restore_opt) elif pr.init_path is not None: model.restore(pr.init_path, restore_resnet18_blocks=False, restore_opt=False) tf.get_default_graph().finalize() model.train()
def train(pr, gpus, restore=False, restore_opt=True, profile=False): print pr gpus = mu.set_gpus(gpus) with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) sess = tf.InteractiveSession(config=config) model = Model(pr, sess, gpus, profile=profile) model.make_model() if restore: model.restore(restore_opt=restore_opt) elif pr.init_path is not None: if pr.init_type in ['shift', 'sep']: model.restore(pr.init_path, restore_opt=False, init_type=pr.init_type) elif pr.init_type == 'i3d': opt_names = ['Adam', 'beta1_power', 'beta2_power', 'Momentum'] rgb_variable_map = {} for variable in tf.global_variables(): if any(x in variable.name for x in opt_names): print 'Skipping:', variable.name continue if variable.name.split('/')[0] == 'RGB': rgb_variable_map[variable.name.replace(':0', '')] = variable print 'Restoring:', variable.name rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) rgb_saver.restore(sess, pr.init_path) elif pr.init_type == 'scratch': pass else: raise RuntimeError() tf.get_default_graph().finalize() model.train()
arg.add_argument('--suffix', type=str, default='') arg.add_argument('--max_full_height', type=int, default=600) #arg.set_defaults(cam = False) arg = arg.parse_args() arg.fullres = arg.fullres or arg.cam if arg.gpu < 0: arg.gpu = None print 'Start time:', arg.start print 'GPU =', arg.gpu gpus = [arg.gpu] gpus = mu.set_gpus(gpus) if arg.duration_mult is not None: pr = sep_params.full() step = 0.001 * pr.frame_step_ms length = 0.001 * pr.frame_length_ms arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult fn = getattr(sep_params, arg.model) pr = fn(vid_dur=arg.clip_dur) if arg.clip_dur is None: arg.clip_dur = pr.vid_dur pr.input_rms = np.sqrt(0.1**2 + 0.1**2) print 'Spectrogram samples:', pr.spec_len pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name,
def main(args): arg = argparse.ArgumentParser( description='Separate on- and off-screen audio from a video') arg.add_argument('vid_file', type=str, help='Video file to process') arg.add_argument( '--duration_mult', type=float, default=None, help= 'Multiply the default duration of the audio (i.e. %f) by this amount. Should be a power of 2.' % sep_params.VidDur) arg.add_argument( '--mask', type=str, default=None, help= "set to 'l' or 'r' to visually mask the left/right half of the video before processing" ) arg.add_argument('--start', type=float, default=0., help='How many seconds into the video to start') arg.add_argument( '--model', type=str, default='full', help='Which variation of othe source separation model to run.') arg.add_argument('--gpu', type=int, default=0, help='Set to -1 for no GPU') arg.add_argument('--out', type=str, default=None, help='Directory to save videos') arg.add_argument('--cam', dest='cam', default=False, action='store_true') # undocumented/deprecated options arg.add_argument('--clip_dur', type=float, default=None) arg.add_argument('--duration', type=float, default=None) arg.add_argument('--fullres', type=bool, default=True) arg.add_argument('--suffix', type=str, default='') arg.add_argument('--max_full_height', type=int, default=600) arg = arg.parse_args(args) arg.fullres = arg.fullres or arg.cam if arg.gpu < 0: arg.gpu = None print 'Start time:', arg.start print 'GPU =', arg.gpu gpus = [arg.gpu] gpus = mu.set_gpus(gpus) if arg.duration_mult is not None: pr = sep_params.full() step = 0.001 * pr.frame_step_ms length = 0.001 * pr.frame_length_ms arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult fn = getattr(sep_params, arg.model) pr = fn(vid_dur=arg.clip_dur) if arg.clip_dur is None: arg.clip_dur = pr.vid_dur pr.input_rms = np.sqrt(0.1**2 + 0.1**2) print 'Spectrogram samples:', pr.spec_len pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name, pr.train_iters) if not os.path.exists(arg.vid_file): print 'Does not exist:', arg.vid_file sys.exit(1) if arg.duration is None: arg.duration = arg.clip_dur + 0.01 print arg.duration, arg.clip_dur full_dur = arg.duration step_dur = arg.clip_dur / 2. filled = np.zeros(int(np.ceil(full_dur * pr.samp_sr)), 'bool') full_samples_fg = np.zeros(filled.shape, 'float32') full_samples_bg = np.zeros(filled.shape, 'float32') full_samples_src = np.zeros(filled.shape, 'float32') arg.start = ut.make_mod(arg.start, (1. / pr.fps)) ts = np.arange(arg.start, arg.start + full_dur - arg.clip_dur, step_dur) full_ims = [None] * int(np.ceil(full_dur * pr.fps)) # Process each video chunk for t in ut.time_est(ts): t = ut.make_mod(t, (1. / pr.fps)) frame_start = int(t * pr.fps - arg.start * pr.fps) ret = run(arg.vid_file, t, arg.clip_dur, pr, gpus[0], mask=arg.mask, arg=arg) if ret is None: continue ims = ret['ims'] for frame, im in zip(xrange(frame_start, frame_start + len(ims)), ims): full_ims[frame] = im samples_fg = ret['samples_pred_fg'][:, 0] samples_bg = ret['samples_pred_bg'][:, 0] samples_src = ret['samples_src'][:, 0] samples_src = samples_src[:samples_bg.shape[0]] sample_start = int(round((t - arg.start) * pr.samp_sr)) n = samples_src.shape[0] inds = np.arange(sample_start, sample_start + n) ok = ~filled[inds] full_samples_fg[inds[ok]] = samples_fg[ok] full_samples_bg[inds[ok]] = samples_bg[ok] full_samples_src[inds[ok]] = samples_src[ok] filled[inds] = True full_samples_fg = np.clip(full_samples_fg, -1., 1.) full_samples_bg = np.clip(full_samples_bg, -1., 1.) full_samples_src = np.clip(full_samples_src, -1., 1.) full_ims = [x for x in full_ims if x is not None] table = [['start =', arg.start], 'fg:', imtable.Video(full_ims, pr.fps, Sound(full_samples_fg, pr.samp_sr)), 'bg:', imtable.Video(full_ims, pr.fps, Sound(full_samples_bg, pr.samp_sr)), 'src:', imtable.Video(full_ims, pr.fps, Sound(full_samples_src, pr.samp_sr))] # Write videos if arg.out is not None: ut.mkdir(arg.out) vid_s = arg.vid_file.split('/')[-1].split('.mp4')[0] mask_s = '' if arg.mask is None else '_%s' % arg.mask cam_s = '' if not arg.cam else '_cam' suffix_s = '' if arg.suffix == '' else '_%s' % arg.suffix name = '%s%s%s_%s' % (suffix_s, mask_s, cam_s, vid_s) def snd(x): x = Sound(x, pr.samp_sr) x.samples = np.clip(x.samples, -1., 1.) return x print 'Writing to:', arg.out ut.save(pj(arg.out, 'ret%s.pk' % name), ret) ut.make_video(full_ims, pr.fps, pj(arg.out, 'fg%s.mp4' % name), snd(full_samples_fg)) ut.make_video(full_ims, pr.fps, pj(arg.out, 'bg%s.mp4' % name), snd(full_samples_bg)) ut.make_video(full_ims, pr.fps, pj(arg.out, 'src%s.mp4' % name), snd(full_samples_src)) else: print 'Not writing, since --out was not set' print 'Video results:' ig.show(table) return 'fg%s.mp4' % name, 'bg%s.mp4' % name
Arg.out = r'E:\Avinash\miscellaneous\project\av_segmentation\multisensory\results' Arg.cam = True Arg.adapt_cam_thresh = True Arg.max_cam_thresh = 35 Arg.clip_dur = None Arg.duration = None Arg.fullres = True Arg.suffix = '' Arg.max_full_height = 600 Arg.fullres = Arg.fullres or Arg.cam if Arg.gpu < 0: Arg.gpu = None arg = Arg(vid_file) gpus = mu.set_gpus([arg.gpu]) if arg.duration_mult is not None: pr = sep_params.full() step = 0.001 * pr.frame_step_ms length = 0.001 * pr.frame_length_ms arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult fn = getattr(sep_params, arg.model) pr = fn(vid_dur=arg.clip_dur) if arg.clip_dur is None: arg.clip_dur = pr.vid_dur pr.input_rms = np.sqrt(0.1**2 + 0.1**2) print('Spectrogram samples:', pr.spec_len) pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name, pr.train_iters)