def __init__(self, path_to_detector=None, path_to_anchor=None, verbose=False, min_score_thresh=0.5, min_suppression_threshold=0.3): super(BlazeFaceDetector, self).__init__(verbose) # Initialise the face detector if path_to_detector is None: model_weights_path = get_weights_path_from_url(blazeface_weights) model_weights = paddle.load(model_weights_path) model_anchors = np.load( get_weights_path_from_url(blazeface_anchors)) else: model_weights = paddle.load(path_to_detector) model_anchors = np.load(path_to_anchor) self.face_detector = BlazeFace() self.face_detector.load_dict(model_weights) self.face_detector.load_anchors_from_npy(model_anchors) self.face_detector.min_score_thresh = min_score_thresh self.face_detector.min_suppression_threshold = min_suppression_threshold self.face_detector.eval()
def load_sample_audio_text(): """Load sample audio and text""" text_path = download.get_weights_path_from_url(TEXT_URL) with open(text_path) as f: text = f.read() audio_path = download.get_weights_path_from_url(AUDIO_URL) x = load_audio(audio_path) return x, text
def from_pretrained(cls, config_name): assert config_name in CONFIG_NAMES, f'input config {config_name} incorrect, available configs: {CONFIG_NAMES}' weight_url = URL_BASE + f'weights/{config_name}.pdparam' weight_path = download.get_weights_path_from_url(weight_url) config_url = URL_BASE + f'config/{config_name}.json' config_path = download.get_weights_path_from_url(config_url) config = PretrainedConfig.from_pretrained(config_path) model = cls(config) state_dict = paddle.load(weight_path) model.load_dict(state_dict) model.eval() return model
def calculate_fid_given_paths(paths, batch_size, use_gpu, dims, model=None, premodel_path=get_weights_path_from_url(FID_WEIGHTS_URL), style=None): assert os.path.exists( premodel_path ), 'pretrain_model path {} is not exists! Please download it first'.format( premodel_path) for p in paths: if not os.path.exists(p): raise RuntimeError('Invalid path: %s' % p) if model is None and style != 'stargan': block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims] model = InceptionV3([block_idx], class_dim=1008) m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size, dims, use_gpu, premodel_path, style) m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size, dims, use_gpu, premodel_path, style) fid_value = _calculate_frechet_distance(m1, s1, m2, s2) return fid_value
def set_paddle_model(self): paddle_model = ViT_base_patch16_224() model_path = get_weights_path_from_url( 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_224_pretrained.pdparams' ) paddle_model.set_dict(paddle.load(model_path)) return paddle_model
def __init__(self, args, cfg, output_path='output'): self.args = args self.cfg = cfg self.weight_path = self.args.model_path if self.weight_path is None: self.weight_path = get_weights_path_from_url(PS_WEIGHT_URL) self.output_path = output_path
def _yolov3_darknet(num_layers=53, num_classes=80, num_max_boxes=50, model_mode='train', pretrained=True): inputs = [ InputSpec([None, 1], 'int64', name='img_id'), InputSpec([None, 2], 'int32', name='img_shape'), InputSpec([None, 3, None, None], 'float32', name='image') ] labels = [ InputSpec([None, num_max_boxes, 4], 'float32', name='gt_bbox'), InputSpec([None, num_max_boxes], 'int32', name='gt_label'), InputSpec([None, num_max_boxes], 'float32', name='gt_score') ] net = YOLOv3(num_classes, model_mode) model = paddle.Model(net, inputs, labels) if pretrained: assert num_layers in pretrain_infos.keys(), \ "YOLOv3-DarkNet{} do not have pretrained weights now, " \ "pretrained should be set as False".format(num_layers) weight_path = get_weights_path_from_url(*(pretrain_infos[num_layers])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" model.load(weight_path) return model
def __init__(self, generator, discriminator=None, syncnet_wt=1.0, max_eval_steps=700, is_train=True): """Initialize the Wav2lip class. Parameters: opt (config dict)-- stores all the experiment flags; needs to be a subclass of Dict """ super(Wav2LipModel, self).__init__() self.syncnet_wt = syncnet_wt self.is_train = is_train self.eval_step = 0 self.max_eval_steps = max_eval_steps self.eval_sync_losses, self.eval_recon_losses = [], [] # define networks (both generator and discriminator) self.nets['netG'] = build_generator(generator) init_weights(self.nets['netG'], distribution='uniform') if self.is_train: self.nets['netD'] = build_discriminator(discriminator) weights_path = get_weights_path_from_url(SYNCNET_WEIGHT_URL) params = paddle.load(weights_path) self.nets['netD'].load_dict(params) if self.is_train: self.recon_loss = paddle.nn.L1Loss()
def _vgg(arch: str, cfg: str, batch_norm: bool, pretrained: bool, **kwargs: Any) -> VGG: if pretrained: kwargs['init_weights'] = False model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) if pretrained: state_dict = get_weights_path_from_url(model_urls[arch]) model.load_dict(state_dict) return model
def _resnet(arch: str, block: Type[Union[BasicBlock, Bottleneck]], layers: List[int], pretrained: bool, **kwargs: Any) -> ResNet: model = ResNet(block, layers, **kwargs) if pretrained: state_dict = get_weights_path_from_url(model_urls[arch]) model.load_dict(state_dict) return model
def vgg16(pretrained=False): features = make_layers(cfg) model = VGG(features) if pretrained: weight_path = get_weights_path_from_url(model_urls['vgg16'][0], model_urls['vgg16'][1]) param = paddle.load(weight_path) model.load_dict(param) return model
def _squeezenet(arch, version, pretrained, **kwargs): model = SqueezeNet(version, **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) param = paddle.load(weight_path) model.set_dict(param) return model
def __init__(self, config_name, pretrained=True): super().__init__() assert config_name in CONFIG_NAMES, f'input config {config_name} incorrect, available configs: {CONFIG_NAMES}' config_url = URL_BASE + f'config/{config_name}.json' config_path = download.get_weights_path_from_url(config_url) config = PretrainedConfig.from_pretrained(config_path) self.wav2vec2 = Wav2Vec2Model(config) self.dropout = nn.Dropout(config.final_dropout) self.lm_head = nn.Linear(config.hidden_size, config.vocab_size) self.config = config if pretrained: weight_url = URL_BASE + f'weights/{config_name}.pdparam' weight_path = download.get_weights_path_from_url(weight_url) state_dict = paddle.load(weight_path) self.load_dict(state_dict) self.eval()
def _resnet(arch, Block, depth, pretrained, **kwargs): model = ResNet(Block, depth, **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) param = paddle.load(weight_path) model.set_dict(param) return model
def _darknet(num_layers=53, input_channels=3, pretrained=True): model = DarkNet(num_layers, input_channels) if pretrained: assert num_layers in pretrain_infos.keys(), \ "DarkNet{} do not have pretrained weights now, " \ "pretrained should be set as False".format(num_layers) weight_path = get_weights_path_from_url(*(pretrain_infos[num_layers])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" weight_dict, _ = fluid.load_dygraph(weight_path[:-9]) model.set_dict(weight_dict) return model
def _mobilenet(arch, pretrained=False, **kwargs): model = MobileNetV2(**kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) param = paddle.load(weight_path) model.load_dict(param) return model
def _darknet(depth=53, pretrained=True): model = DarkNet(depth) if pretrained: assert depth in pretrain_infos.keys(), \ "DarkNet{} do not have pretrained weights now, " \ "pretrained should be set as False".format(num_layers) weight_path = get_weights_path_from_url(*(pretrain_infos[depth])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" params = paddle.load(weight_path) model.load_dict(params) return model
def _resnet(arch, Block, depth, pretrained, **kwargs): model = ResNet(Block, depth, **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" param, _ = fluid.load_dygraph(weight_path) model.set_dict(param) return model
def __init__(self, path_to_detector=None, verbose=False): super(SFDDetector, self).__init__(verbose) # Initialise the face detector if path_to_detector is None: model_weights_path = get_weights_path_from_url(models_urls['s3fd']) model_weights = paddle.load(model_weights_path) else: model_weights = paddle.load(path_to_detector) self.face_detector = s3fd() self.face_detector.load_dict(model_weights) self.face_detector.eval()
def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) param = paddle.load(weight_path) model.load_dict(param) return model
def build_model(name="RN50"): assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" name2model = { "RN101": build_rn101_model, "VIT": build_vit_model, "RN50": build_rn50_model, } model = name2model[name]() weight = download.get_weights_path_from_url(URL[name]) sd = paddle.load(weight) model.load_dict(sd) model.eval() return model
def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True): inputs = [InputSpec([None, 8, 3, 224, 224], 'float32', name='image')] labels = [InputSpec([None, 1], 'int64', name='label')] net = TSM_ResNet(num_layers, seg_num, num_classes) model = paddle.Model(net, inputs, labels) if pretrained: assert num_layers in pretrain_infos.keys(), \ "TSM-ResNet{} do not have pretrained weights now, " \ "pretrained should be set as False".format(num_layers) weight_path = get_weights_path_from_url(*(pretrain_infos[num_layers])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" # weight_dict, _ = fluid.load_dygraph(weight_path) # model.set_dict(weight_dict) model.load(weight_path) return model
def _mobilenet_v3(arch, pretrained=False, scale=1.0, **kwargs): if arch == "mobilenet_v3_large": model = MobileNetV3Large(scale=scale, **kwargs) else: model = MobileNetV3Small(scale=scale, **kwargs) if pretrained: arch = "{}_x{}".format(arch, scale) assert ( arch in model_urls ), "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) param = paddle.load(weight_path) model.set_dict(param) return model
def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): model = VGG(make_layers( cfgs[cfg], batch_norm=batch_norm), num_classes=1000, **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" param, _ = fluid.load_dygraph(weight_path) model.load_dict(param) return model
def bmn(tscale, dscale, feat_dim, prop_boundary_ratio, num_sample, num_sample_perbin, mode, pretrained=True): """BMN model Args: tscale (int): sequence length, default 100. dscale (int): max duration length, default 100. prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5. num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32. num_sample_perbin (int): number of selected points in each sample, default 3. pretrained (bool): If True, returns a model with pre-trained model, default True. """ inputs = [ InputSpec( [None, feat_dim, tscale], 'float32', name='feat_input') ] gt_iou_map = InputSpec( [None, dscale, tscale], 'float32', name='gt_iou_map') gt_start = InputSpec([None, tscale], 'float32', name='gt_start') gt_end = InputSpec([None, tscale], 'float32', name='gt_end') video_idx = InputSpec([None, 1], 'int64', name='video_idx') label_dict = { 'train': [gt_iou_map, gt_start, gt_end], 'test': [gt_iou_map, gt_start, gt_end, video_idx], 'infer': [video_idx] } labels = label_dict[mode] net = BMN(tscale, dscale, prop_boundary_ratio, num_sample, num_sample_perbin) model = paddle.Model(net, inputs, labels) if pretrained: weight_path = get_weights_path_from_url(*(pretrain_infos['bmn'])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" model.load(weight_path) return model
def googlenet(pretrained=False, **kwargs): """GoogLeNet (Inception v1) model architecture from `"Going Deeper with Convolutions" <https://arxiv.org/pdf/1409.4842.pdf>`_ Args: pretrained (bool): If True, returns a model pre-trained on ImageNet Examples: .. code-block:: python import paddle from paddle.vision.models import googlenet # build model model = googlenet() # build model and load imagenet pretrained weight # model = googlenet(pretrained=True) x = paddle.rand([1, 3, 224, 224]) out, out1, out2 = model(x) print(out.shape) """ model = GoogLeNet(**kwargs) arch = "googlenet" if pretrained: assert ( arch in model_urls ), "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) param = paddle.load(weight_path) model.set_dict(param) return model
def __init__(self, vocab_file=None, bos_token="<s>", eos_token="</s>", unk_token="<unk>", pad_token="<pad>", word_delimiter_token="|", do_lower_case=True, verbose=False): if vocab_file is None: vocab_url = URL_BASE + 'config/vocab.json' vocab_file = download.get_weights_path_from_url(vocab_url) self._bos_token = bos_token self._eos_token = eos_token self._unk_token = unk_token self._sep_token = None self._pad_token = pad_token self._cls_token = None self._mask_token = None self._pad_token_type_id = 0 self._word_delimiter_token = word_delimiter_token self.verbose = verbose self._additional_special_tokens = [] self.added_tokens_encoder: Dict[str, int] = {} self.added_tokens_decoder: Dict[int, str] = {} self.do_lower_case = do_lower_case with open(vocab_file, encoding="utf-8") as vocab_handle: self.encoder = json.load(vocab_handle) self.decoder = {v: k for k, v in self.encoder.items()}
def inception_v3(pretrained=False, **kwargs): """ InceptionV3 model from `"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/pdf/1512.00567.pdf>`_ Args: pretrained (bool): If True, returns a model pre-trained on ImageNet Examples: .. code-block:: python import paddle from paddle.vision.models import inception_v3 # build model model = inception_v3() # build model and load imagenet pretrained weight # model = inception_v3(pretrained=True) x = paddle.rand([1, 3, 299, 299]) out = model(x) print(out.shape) """ model = InceptionV3(**kwargs) arch = "inception_v3" if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) param = paddle.load(weight_path) model.set_dict(param) return model
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Audioset inference') parser.add_argument('--device', help='set the gpu device number', type=int, required=False, default=0) parser.add_argument('--weight', type=str, required=False, default='') args = parser.parse_args([]) paddle.set_device('gpu:{}'.format(args.device)) ModelClass = eval(c['model_type']) model = ModelClass(pretrained=False, num_classes=c['num_classes'], dropout=c['dropout']) if args.weight.strip() == '': args.weight = download.get_weights_path_from_url(checkpoint_url) model.load_dict(paddle.load(args.weight)) model.eval() _, val_loader = get_loader() logger.info(f'evaluating...') val_acc, val_preci, val_recall, mAP_scores = evaluate( 0, val_loader, model, F.binary_cross_entropy_with_logits) avg_map = np.mean(mAP_scores) logger.info(f'average mAP: {avg_map}')
def run(self): if not os.path.isfile(self.args.face): raise ValueError( '--face argument must be a valid path to video/image file') elif path.basename( self.args.face).split('.')[1] in ['jpg', 'png', 'jpeg']: full_frames = [cv2.imread(self.args.face)] fps = self.args.fps else: video_stream = cv2.VideoCapture(self.args.face) fps = video_stream.get(cv2.CAP_PROP_FPS) print('Reading video frames...') full_frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading: video_stream.release() break if self.args.resize_factor > 1: frame = cv2.resize( frame, (frame.shape[1] // self.args.resize_factor, frame.shape[0] // self.args.resize_factor)) if self.args.rotate: frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_CLOCKWISE) y1, y2, x1, x2 = self.args.crop if x2 == -1: x2 = frame.shape[1] if y2 == -1: y2 = frame.shape[0] frame = frame[y1:y2, x1:x2] full_frames.append(frame) print("Number of frames available for inference: " + str(len(full_frames))) if not self.args.audio.endswith('.wav'): print('Extracting raw audio...') command = 'ffmpeg -y -i {} -strict -2 {}'.format( self.args.audio, 'temp/temp.wav') subprocess.call(command, shell=True) self.args.audio = 'temp/temp.wav' wav = audio.load_wav(self.args.audio, 16000) mel = audio.melspectrogram(wav) print(mel.shape) if np.isnan(mel.reshape(-1)).sum() > 0: raise ValueError( 'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again' ) mel_chunks = [] mel_idx_multiplier = 80. / fps i = 0 while 1: start_idx = int(i * mel_idx_multiplier) if start_idx + mel_step_size > len(mel[0]): mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:]) break mel_chunks.append(mel[:, start_idx:start_idx + mel_step_size]) i += 1 print("Length of mel chunks: {}".format(len(mel_chunks))) full_frames = full_frames[:len(mel_chunks)] batch_size = self.args.wav2lip_batch_size gen = self.datagen(full_frames.copy(), mel_chunks) model = Wav2Lip() if self.args.checkpoint_path is None: model_weights_path = get_weights_path_from_url(WAV2LIP_WEIGHT_URL) weights = paddle.load(model_weights_path) else: weights = paddle.load(self.args.checkpoint_path) model.load_dict(weights) model.eval() print("Model loaded") for i, (img_batch, mel_batch, frames, coords) in enumerate( tqdm(gen, total=int(np.ceil(float(len(mel_chunks)) / batch_size)))): if i == 0: frame_h, frame_w = full_frames[0].shape[:-1] out = cv2.VideoWriter('temp/result.avi', cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h)) img_batch = paddle.to_tensor(np.transpose( img_batch, (0, 3, 1, 2))).astype('float32') mel_batch = paddle.to_tensor(np.transpose( mel_batch, (0, 3, 1, 2))).astype('float32') with paddle.no_grad(): pred = model(mel_batch, img_batch) pred = pred.numpy().transpose(0, 2, 3, 1) * 255. for p, f, c in zip(pred, frames, coords): y1, y2, x1, x2 = c p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1)) f[y1:y2, x1:x2] = p out.write(f) out.release() command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format( self.args.audio, 'temp/result.avi', self.args.outfile) subprocess.call(command, shell=platform.system() != 'Windows')